Introduction

In addition to all our SIP fraction sequences, we sequenced the unfractionated samples from all microcosms. For these samples, DNA was simply extracted from the soil using the MoBio PowerMag Microbiome RNA/DNA Isolation Kit (#27500-4-EP), amplified using PCR targeting the V4 region of the 16S rRNA gene (515f-806r) and sequenced. This DNA was not put through a CsCl gradient.

This analysis allows us to see what the microbial communities within the microcosms look like over time and compare between land-use regimes. Note that since we are not fractionated the samples, we should see no effect of substrate. Remember that all microcosms were essentially treated the same in this case where the only difference is which substrates had 13C label. All except for the H2O-Control samples of course.

Initialization

# For data handling
library(dplyr)
library(phyloseq)

# For analysis
library(ape)
library(vegan)
library(lsmeans)

# For plotting
library(ggplot2)

# Set color schemes
eco.col = c(agriculture="#00BA38", meadow="#619CFF", forest="#F8766D")

# Function for getting legends
g_legend<-function(a.gplot){
  tmp <- ggplot_gtable(ggplot_build(a.gplot))
  leg <- which(sapply(tmp$grobs, function(x) x$name) == "guide-box")
  legend <- tmp$grobs[[leg]]
  return(legend)}

Data

All unfractionated sequencing data is found in the master phyloseq object and labeled as “Unfractionated”.

# Import phyloseq object
physeq = readRDS("/Users/sambarnett/Documents/Buckley Lab/FullCyc2/fullcyc2_backups_8_8_19/phyloseq/fullcyc2physeq.RDS")

# Subset to just the unfractionated samples and remove the controls
unfrac.physeq = subset_samples(physeq, exp_type == "Unfractionated" & sample_type == "unknown")
physeq = NULL

# Remove samples MR.F.12C-Con.D0.R2 and MR.M.12C-Con.D0.R2 since these were likely accidently switched. Their correct, resequenced versions are called MR.F.12C-Con.D0.R2_primer2 and MR.M.12C-Con.D0.R2_primer2
unfrac.physeq = subset_samples(unfrac.physeq, !(X.Sample %in% c("MR.F.12C-Con.D0.R2", "MR.M.12C-Con.D0.R2")))

# Remove non-bacteria (aka Archaea)
unfrac.physeq = subset_taxa(unfrac.physeq, Domain == "Bacteria")

# Plot the number of reads recovered from each sample. All samples should be above 3000 so we wont remove any due to low sequencing.
sample_depths.df = data.frame(colSums(otu_table(unfrac.physeq))) %>%
  rename(depth = colSums.otu_table.unfrac.physeq..) %>%
  tibble::rownames_to_column(var="X.Sample") %>%
  left_join(data.frame(sample_data(unfrac.physeq)) %>% select(X.Sample, ecosystem, substrate, day), by = "X.Sample") %>%
  arrange(-depth) %>%
  mutate(rank = row_number())
  
ggplot(data=sample_depths.df, aes(x=rank, y=log10(depth), fill=ecosystem, color=ecosystem)) +
  geom_bar(stat="identity") +
  geom_hline(yintercept = log10(3000)) +
  theme_bw()

# Add in a different phylogenetic tree. The one in the phyloseq might be an older version.
tree = read_tree("/Users/sambarnett/Documents/Buckley Lab/FullCyc2/fullcyc2_backups_8_8_19/fullcyc2.bacteria.cogent.tree")
phy_tree(unfrac.physeq) = tree

# Remove any OTUs no longer found in the samples
unfrac.physeq = prune_taxa(taxa_sums(unfrac.physeq) > 0, unfrac.physeq)

unfrac.physeq
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 15387 taxa and 165 samples ]
## sample_data() Sample Data:       [ 165 samples by 38 sample variables ]
## tax_table()   Taxonomy Table:    [ 15387 taxa by 7 taxonomic ranks ]
## phy_tree()    Phylogenetic Tree: [ 15387 tips and 15386 internal nodes ]

For many of the following analyses I want a rarefied OTU table. This is one way to correct for differing sequencing depths across all my samples. I will set the seed for this process so that I can replicate this analysis if necessary (seed = 4242).

unfrac.rare.physeq = rarefy_even_depth(unfrac.physeq, rngseed=4242)
unfrac.rare.physeq
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 10014 taxa and 165 samples ]
## sample_data() Sample Data:       [ 165 samples by 38 sample variables ]
## tax_table()   Taxonomy Table:    [ 10014 taxa by 7 taxonomic ranks ]
## phy_tree()    Phylogenetic Tree: [ 10014 tips and 10013 internal nodes ]
rarecurve.df = data.frame()
for (rare_step in seq(1, max(sample_sums(unfrac.physeq)), by = 1000)){
  unfrac.steprare.physeq = rarefy_even_depth(unfrac.physeq, sample.size=rare_step, rngseed=4242)
  stepotu_counts = data.frame(n_OTUs = specnumber(t(otu_table(unfrac.steprare.physeq)))) %>%
    tibble::rownames_to_column(var="microcosm") %>%
    mutate(n_reads = rare_step)
  rarecurve.df = rbind(rarecurve.df, stepotu_counts)
}

ggplot(data=rarecurve.df, aes(x=n_reads, y=n_OTUs, group=microcosm)) +
  geom_line() +
  geom_vline(xintercept = mean(colSums(otu_table(unfrac.rare.physeq)))) +
  theme_bw() + 
  theme(legend.position = "none")

Some basic stats

print(paste("Maximum read count =", max(colSums(otu_table(unfrac.physeq)))))
## [1] "Maximum read count = 79981"
print(paste("Minimum read count =", min(colSums(otu_table(unfrac.physeq)))))
## [1] "Minimum read count = 3302"
print(paste("Rarefied read count =", unique(colSums(otu_table(unfrac.rare.physeq)))))
## [1] "Rarefied read count = 3302"
print(paste("Number of OTUs total =", ntaxa(unfrac.physeq)))
## [1] "Number of OTUs total = 15387"
print(paste("Number of OTUs rarefied =", ntaxa(unfrac.rare.physeq)))
## [1] "Number of OTUs rarefied = 10014"
print(paste("Number of phyla total =", length(unique(filter(data.frame(tax_table(unfrac.physeq), stringsAsFactors = FALSE), !(is.na(Phylum)))$Phylum))))
## [1] "Number of phyla total = 39"
print(paste("Number of phyla total =", length(unique(filter(data.frame(tax_table(unfrac.rare.physeq), stringsAsFactors = FALSE), !(is.na(Phylum)))$Phylum))))
## [1] "Number of phyla total = 34"

Alpha diversity

First I’ll look at the alpha diversity of our samples. Specifically I’ll look at the OTU richness, shannon index, simpson index, and Pielou’s evenness. I will compare these values between land-use regimes and see how they change over time relative to their starting points (day 0).

Get alpha diversity values

Here I’ll calculate the desired alpha diveristy measures.

OTU.table = t(otu_table(unfrac.rare.physeq))
alpha_div.df = data.frame(X.Sample = rownames(OTU.table), 
                          richness = specnumber(OTU.table),
                          shannon = diversity(OTU.table, index="shannon"),
                          simpson = diversity(OTU.table, index="simpson")) %>%
  mutate(evenness = shannon/log(richness)) %>%
  left_join(data.frame(sample_data(unfrac.physeq)) %>% select(X.Sample, ecosystem, substrate, day, microcosm_replicate, DNA_conc__ng_ul),
            by = "X.Sample") %>%
  mutate(ecosystem = factor(ecosystem, levels = c("agriculture", "meadow", "forest")))

Compare alpha diversity between land-use regimes over time

Now I’ll compare these four alpha diversity metrics between the three land use regimes over the course of the experiment. I’ll remove the H2O-Control sample from this experiment however. Note that I am not using a repeated measures here (mixed effect), because each timepoint is a separate microcosms therefore not a repeated measures.

# Remove H2O-Con samples and set day to be a factor
alpha_div_noH2O.df = alpha_div.df %>%
  filter(substrate != "H2O-Con") %>%
  mutate(day = as.factor(day))

# Richness
richness.lm = lm(richness ~ ecosystem*day, data=alpha_div_noH2O.df)
richness.lsmeans = lsmeans(richness.lm, pairwise~ecosystem*day, adjust=NULL)
richness_means.df = data.frame(richness.lsmeans$lsmeans) %>%
  mutate(day = as.numeric(as.character(day)),
         diversity = "richness")
richness_pairwise.df = data.frame(richness.lsmeans$contrasts) %>%
  mutate(padj = p.adjust(p.value, method="BH")) %>%
  tidyr::separate(contrast, into=c("ecosystem_1", "day_1", "ecosystem_2", "day_2"), remove=FALSE) %>%
  filter(ecosystem_1 != ecosystem_2, day_1 == day_2) %>%
  mutate(day = as.numeric(as.character(day_1))) %>%
  dplyr::select(-day_1, -day_2) %>%
  mutate(contrast = paste(ecosystem_1, ecosystem_2, sep="-"),
         diversity = "richness")

# Shannon index
shannon.lm = lm(shannon ~ ecosystem*day, data=alpha_div_noH2O.df)
shannon.lsmeans = lsmeans(shannon.lm, pairwise~ecosystem*day, adjust=NULL)
shannon_means.df = data.frame(shannon.lsmeans$lsmeans) %>%
  mutate(day = as.numeric(as.character(day)),
         diversity = "shannon")
shannon_pairwise.df = data.frame(shannon.lsmeans$contrasts) %>%
  mutate(padj = p.adjust(p.value, method="BH")) %>%
  tidyr::separate(contrast, into=c("ecosystem_1", "day_1", "ecosystem_2", "day_2"), remove=FALSE) %>%
  filter(ecosystem_1 != ecosystem_2, day_1 == day_2) %>%
  mutate(day = as.numeric(as.character(day_1))) %>%
  dplyr::select(-day_1, -day_2) %>%
  mutate(contrast = paste(ecosystem_1, ecosystem_2, sep="-"),
         diversity = "shannon")

# Simpson index
simpson.lm = lm(simpson ~ ecosystem*day, data=alpha_div_noH2O.df)
simpson.lsmeans = lsmeans(simpson.lm, pairwise~ecosystem*day, adjust=NULL)
simpson_means.df = data.frame(simpson.lsmeans$lsmeans) %>%
  mutate(day = as.numeric(as.character(day)),
         diversity = "simpson")
simpson_pairwise.df = data.frame(simpson.lsmeans$contrasts) %>%
  mutate(padj = p.adjust(p.value, method="BH")) %>%
  tidyr::separate(contrast, into=c("ecosystem_1", "day_1", "ecosystem_2", "day_2"), remove=FALSE) %>%
  filter(ecosystem_1 != ecosystem_2, day_1 == day_2) %>%
  mutate(day = as.numeric(as.character(day_1))) %>%
  dplyr::select(-day_1, -day_2) %>%
  mutate(contrast = paste(ecosystem_1, ecosystem_2, sep="-"),
         diversity = "simpson")

# Evenness
evenness.lm = lm(evenness ~ ecosystem*day, data=alpha_div_noH2O.df)
evenness.lsmeans = lsmeans(evenness.lm, pairwise~ecosystem*day, adjust=NULL)
evenness_means.df = data.frame(evenness.lsmeans$lsmeans) %>%
  mutate(day = as.numeric(as.character(day)),
         diversity = "evenness")
evenness_pairwise.df = data.frame(evenness.lsmeans$contrasts) %>%
  mutate(padj = p.adjust(p.value, method="BH")) %>%
  tidyr::separate(contrast, into=c("ecosystem_1", "day_1", "ecosystem_2", "day_2"), remove=FALSE) %>%
  filter(ecosystem_1 != ecosystem_2, day_1 == day_2) %>%
  mutate(day = as.numeric(as.character(day_1))) %>%
  dplyr::select(-day_1, -day_2) %>%
  mutate(contrast = paste(ecosystem_1, ecosystem_2, sep="-"),
         diversity = "evenness")

anova(richness.lm)
## Analysis of Variance Table
## 
## Response: richness
##                Df  Sum Sq Mean Sq F value    Pr(>F)    
## ecosystem       2 3661149 1830574 646.375 < 2.2e-16 ***
## day             5  357403   71481  25.240 < 2.2e-16 ***
## ecosystem:day  10  463468   46347  16.365 < 2.2e-16 ***
## Residuals     138  390824    2832                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(shannon.lm)
## Analysis of Variance Table
## 
## Response: shannon
##                Df Sum Sq Mean Sq F value    Pr(>F)    
## ecosystem       2 44.966 22.4829 676.537 < 2.2e-16 ***
## day             5 12.753  2.5507  76.753 < 2.2e-16 ***
## ecosystem:day  10 18.508  1.8508  55.694 < 2.2e-16 ***
## Residuals     138  4.586  0.0332                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(simpson.lm)
## Analysis of Variance Table
## 
## Response: simpson
##                Df   Sum Sq  Mean Sq F value    Pr(>F)    
## ecosystem       2 0.086573 0.043287 221.833 < 2.2e-16 ***
## day             5 0.045568 0.009114  46.705 < 2.2e-16 ***
## ecosystem:day  10 0.080140 0.008014  41.070 < 2.2e-16 ***
## Residuals     138 0.026928 0.000195                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(evenness.lm)
## Analysis of Variance Table
## 
## Response: evenness
##                Df  Sum Sq  Mean Sq F value    Pr(>F)    
## ecosystem       2 0.55964 0.279820 488.877 < 2.2e-16 ***
## day             5 0.20958 0.041915  73.231 < 2.2e-16 ***
## ecosystem:day  10 0.31557 0.031557  55.133 < 2.2e-16 ***
## Residuals     138 0.07899 0.000572                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Plot these results

alpha_means.df = rbind(richness_means.df, shannon_means.df, simpson_means.df, evenness_means.df) %>%
  mutate(diversity = factor(diversity, levels = c("richness", "shannon", "simpson", "evenness")))
alpha_pairwise.df = rbind(richness_pairwise.df, shannon_pairwise.df, simpson_pairwise.df, evenness_pairwise.df) %>%
  mutate(diversity = factor(diversity, levels = c("richness", "shannon", "simpson", "evenness")))

# dataframe with the y-coordinates for the significance symbols for each substrate
sig_y.df = data.frame(diversity = factor(c("richness", "richness", "richness",
                                           "shannon", "shannon", "shannon",
                                           "simpson", "simpson", "simpson",
                                           "evenness", "evenness", "evenness"),
                                         levels = c("richness", "shannon", "simpson", "evenness")),
                      contrast = c("agriculture-meadow", "agriculture-forest", "meadow-forest"),
                      label=c("CL - OF", "CL - F", "OF - F"),
                      y = c(1310, 1230, 1150, 7.5, 7.1, 6.7, 1.075, 1.05, 1.025, 1.07, 1.02, 0.97),
                      xend = 30)
alpha_pairwise.df = left_join(alpha_pairwise.df, sig_y.df, by = c("contrast", "diversity"))

sig.lab = data.frame(substrate = as.factor(c("13C-Xyl", "13C-Xyl", "13C-Xyl")),
                     label = c("Top: agriculture-meadow", "Middle: agriculture-forest", "Bottom: meadow-forest"),
                     y = c(0.04, 0.025, 0.01),
                     x = 20)

alpha_pairwise.plot = ggplot(data = filter(alpha_means.df, diversity != "simpson"), aes(x=day, y=lsmean)) +
  geom_line(aes(color=ecosystem)) +
  geom_errorbar(aes(ymin=lower.CL, ymax=upper.CL), width=0.5) +
  geom_point(aes(color=ecosystem), alpha=0.7) +
  geom_segment(data=filter(sig_y.df, diversity != "simpson"), x=0, aes(xend=xend, y=y, yend=y), color="grey70") +
  geom_point(data=alpha_pairwise.df[alpha_pairwise.df$contrast == "agriculture-meadow" & alpha_pairwise.df$padj < 0.05 & alpha_pairwise.df$diversity != "simpson",], 
             aes(x=day, y=y), color="black", shape=18, size=2) +
  geom_point(data=alpha_pairwise.df[alpha_pairwise.df$contrast == "agriculture-forest" & alpha_pairwise.df$padj < 0.05 & alpha_pairwise.df$diversity != "simpson",], 
             aes(x=day, y=y), color="black", shape=18, size=2) +
  geom_point(data=alpha_pairwise.df[alpha_pairwise.df$contrast == "meadow-forest" & alpha_pairwise.df$padj < 0.05 & alpha_pairwise.df$diversity != "simpson",], 
             aes(x=day, y=y), color="black", shape=18, size=2) +
  geom_text(data=filter(sig_y.df, diversity != "simpson"), aes(label=label, y=y), x=30.5, hjust=0) +
  scale_color_manual(values = eco.col, labels = c("agriculture" = "Cropland", "meadow" = "Old-Field", "forest" = "Forest")) +
  lims(x=c(-0.5, 33)) +
  labs(x="Time since substrate addition (days)", y="Diversity",
       color="Land-use") +
  theme_bw() +
  theme(legend.position = "top",
        legend.title=element_text(size=14),
        legend.text=element_text(size=12),
        axis.title = element_text(size=14),
        axis.text = element_text(size=12),
        strip.text = element_text(size=10)) +
  facet_wrap(~diversity, ncol=1, scales="free_y")

alpha_pairwise.plot

#ggsave(alpha_pairwise.plot, filename = "/Users/sambarnett/Documents/Dissertation/figures/figS2_5.tiff", 
#       device = "tiff", width = 6.69291, height = 6.69291, units = "in")

Compare alpha diversity over time to timepoint 0 for each land use

Now I want to see how much the alpha diversity diverges from its starting point before substrate addition. For this analysis I’ll be comparing each diversity metric from each day to the baseline of the average diversity on day 0. I will do this separately for each land use regime. I will also see if the H2O-Control sample from day 30 differs much from this timepoint 0.

# Get day 0 measure
alpha_div_post.df = alpha_div.df %>%
  tidyr::gather(key="diversity", value="measure", -X.Sample, -ecosystem, -substrate, -day, -microcosm_replicate, -DNA_conc__ng_ul) %>%
  mutate(day0_measure = ifelse(day == 0, measure, NA),
         treatment = factor(ifelse(substrate == "H2O-Con", "Water", "Carbon"), levels=c("Carbon", "Water")),
         diversity = factor(diversity, levels = c("richness", "shannon", "simpson", "evenness"))) %>%
  group_by(ecosystem, diversity) %>%
  mutate(day0_measure = mean(day0_measure, na.rm = TRUE)) %>%
  ungroup

# Run a one sample t-test for each day, land-use, and diversity metric combinations
alpha_ttest.df = data.frame()

for (eco in c("agriculture", "meadow", "forest")){
  for (div in c("richness", "shannon", "simpson", "evenness")){
    for (dia in c(1,3,6,14,30)){
    treat = "Carbon"
    sub.df = filter(alpha_div_post.df, day==dia, treatment==treat, ecosystem==eco, diversity==div)
    sub.ttest = t.test(sub.df$measure, mu=unique(sub.df$day0_measure))
    alpha_ttest.df = rbind(alpha_ttest.df, data.frame(ecosystem = eco, day = dia, treatment = treat, diversity = div,
                                                      t = sub.ttest$statistic, df = sub.ttest$parameter, 
                                                      pvalue = sub.ttest$p.value, mean = sub.ttest$estimate, 
                                                      lower.CL = sub.ttest$conf.int[1], upper.CL = sub.ttest$conf.int[2]))
    }
    treat = "Water"
    dia = 30
    sub.df = filter(alpha_div_post.df, day==dia, treatment==treat, ecosystem==eco, diversity==div)
    sub.ttest = t.test(sub.df$measure, mu=unique(sub.df$day0_measure))
    alpha_ttest.df = rbind(alpha_ttest.df, data.frame(ecosystem = eco, day = dia, treatment = treat, diversity = div,
                                                      t = sub.ttest$statistic, df = sub.ttest$parameter, 
                                                      pvalue = sub.ttest$p.value, mean = sub.ttest$estimate, 
                                                      lower.CL = sub.ttest$conf.int[1], upper.CL = sub.ttest$conf.int[2]))
  }
}

alpha_ttest.df = alpha_ttest.df %>%
  group_by(diversity) %>%
  mutate(padj = p.adjust(pvalue, method="BH")) %>%
  ungroup %>%
  mutate(sig = ifelse(padj < 0.05, "Significant", "Non-significant"),
         diversity = factor(diversity, levels = c("richness", "shannon", "simpson", "evenness")),
         treatment = factor(treatment, levels=c("Carbon", "Water")))

alpha_ttest.df
## # A tibble: 72 x 12
##    ecosystem   day treatment diversity      t    df   pvalue   mean lower.CL
##    <fct>     <dbl> <fct>     <fct>      <dbl> <dbl>    <dbl>  <dbl>    <dbl>
##  1 agricult…     1 Carbon    richness   -6.86     5 1.01e- 3 940.     911.  
##  2 agricult…     3 Carbon    richness  -12.9      5 5.05e- 5 553.     460.  
##  3 agricult…     6 Carbon    richness  -34.0     11 1.68e-12 607.     581.  
##  4 agricult…    14 Carbon    richness  -28.3     13 4.60e-13 692.     667.  
##  5 agricult…    30 Carbon    richness  -26.2     11 2.96e-11 710.     684.  
##  6 agricult…    30 Water     richness   -4.03     2 5.63e- 2 904      782.  
##  7 agricult…     1 Carbon    shannon    -7.23     5 7.88e- 4   6.12     6.07
##  8 agricult…     3 Carbon    shannon   -11.6      5 8.19e- 5   3.64     3.07
##  9 agricult…     6 Carbon    shannon   -20.9     11 3.30e-10   4.15     3.93
## 10 agricult…    14 Carbon    shannon   -20.4     13 2.90e-11   4.95     4.81
## # … with 62 more rows, and 3 more variables: upper.CL <dbl>, padj <dbl>,
## #   sig <chr>

Now I’ll plot the results.

alpha_ttest.LU.df = left_join(alpha_ttest.df, 
                           data.frame(ecosystem = c("agriculture", "meadow", "forest"),
                                      landuse = factor(c("Cropland", "Old-field", "Forest"), 
                                                       levels=c("Cropland", "Old-field", "Forest"))),
                           by = "ecosystem")
alpha_div_post.LU.df = left_join(alpha_div_post.df, 
                                 data.frame(ecosystem = c("agriculture", "meadow", "forest"),
                                            landuse = factor(c("Cropland", "Old-field", "Forest"), 
                                                             levels=c("Cropland", "Old-field", "Forest"))),
                                 by = "ecosystem")

alpha_to0.plot = ggplot(data=filter(alpha_ttest.LU.df, diversity != "simpson"), aes(x=day, y=mean)) +
  geom_hline(data=filter(unique(select(alpha_div_post.LU.df, landuse, diversity, day0_measure)), diversity != "simpson"),
             aes(yintercept=day0_measure), linetype=2) +
  geom_line(data=filter(alpha_ttest.LU.df[alpha_ttest.LU.df$treatment=="Carbon",], diversity != "simpson")) +
  geom_errorbar(aes(ymin=lower.CL, ymax=upper.CL), width=1) +
  geom_point(aes(fill=sig, shape=treatment), size=3) +
  scale_fill_manual(values = c("Significant" = "black", "Non-significant" = "white")) +
  scale_shape_manual(values = c(Water=24, Carbon = 21)) +
  labs(x="Time since substrate addition (days)", y="Diversity") +
  theme_bw() +
  theme(legend.position = "none",
        legend.title=element_text(size=14),
        legend.text=element_text(size=12),
        axis.title = element_text(size=14),
        axis.text = element_text(size=12),
        strip.text = element_text(size=14)) +
  facet_grid(diversity~landuse, scales = "free_y")

alpha_to0.plot

#ggsave(alpha_to0.plot, filename = "/Users/sambarnett/Documents/Dissertation/figures/figS2_6.tiff", 
#       device = "tiff", width = 6.69291, height = 6.69291, units = "in")

Beta diversity

So we saw that alpha diversity varies across land-use and time. Now I want to see how the community compositions vary between land-use regimes and across time after substrate addition. For this analysis I will look at:

  • Ordination: How compositionally dissimilar are all the microcosms and is there a pattern.
  • Resistance/Resiliance: comparing composition at each timepoint to timepoint 0.
  • Convergence/Divergence: comparing composition across land-use regimes over time.

Get compositional distance measures

The first thing I need to do is measure the distance or dissimilarity between all microcosm communities. I will use three metrics for this: Bray-Curtis dissimilarity, unweighted UniFrac distance, and weighted UniFrac.

unfrac_BC.dist = vegdist(t(otu_table(unfrac.rare.physeq)), method="bray", binary=FALSE, diag=TRUE, upper=TRUE)
unfrac_uwUF.dist = distance(unfrac.rare.physeq, method="unifrac")
unfrac_wUF.dist = distance(unfrac.rare.physeq, method="wunifrac")

Ordinate

Now I’ll make ordinations based on the distance measures and look for a pattern. For this I’ll use NMDS to be consistent across all metrics.

# Run the NMDS ordinations
set.seed(4242)
unfrac_BC.ord = metaMDS(unfrac_BC.dist, trymax = 1000)
## Run 0 stress 0.05914022 
## Run 1 stress 0.05914015 
## ... New best solution
## ... Procrustes: rmse 0.0003976242  max resid 0.004760278 
## ... Similar to previous best
## Run 2 stress 0.05914348 
## ... Procrustes: rmse 0.0002630298  max resid 0.001646961 
## ... Similar to previous best
## Run 3 stress 0.05913834 
## ... New best solution
## ... Procrustes: rmse 0.0005305894  max resid 0.005630054 
## ... Similar to previous best
## Run 4 stress 0.05914034 
## ... Procrustes: rmse 0.0002865668  max resid 0.002512786 
## ... Similar to previous best
## Run 5 stress 0.05914124 
## ... Procrustes: rmse 0.0003078596  max resid 0.002246905 
## ... Similar to previous best
## Run 6 stress 0.05914042 
## ... Procrustes: rmse 0.0002826469  max resid 0.002286847 
## ... Similar to previous best
## Run 7 stress 0.05914136 
## ... Procrustes: rmse 0.000554491  max resid 0.005678648 
## ... Similar to previous best
## Run 8 stress 0.05913935 
## ... Procrustes: rmse 0.0004819438  max resid 0.005597857 
## ... Similar to previous best
## Run 9 stress 0.05914309 
## ... Procrustes: rmse 0.000543916  max resid 0.00562829 
## ... Similar to previous best
## Run 10 stress 0.05913951 
## ... Procrustes: rmse 0.0002497161  max resid 0.00261592 
## ... Similar to previous best
## Run 11 stress 0.05914315 
## ... Procrustes: rmse 0.0005760401  max resid 0.005670918 
## ... Similar to previous best
## Run 12 stress 0.05914305 
## ... Procrustes: rmse 0.0005586587  max resid 0.005636709 
## ... Similar to previous best
## Run 13 stress 0.05913998 
## ... Procrustes: rmse 0.0004950668  max resid 0.005667864 
## ... Similar to previous best
## Run 14 stress 0.05914111 
## ... Procrustes: rmse 0.0005446641  max resid 0.005608481 
## ... Similar to previous best
## Run 15 stress 0.05913999 
## ... Procrustes: rmse 0.0001813058  max resid 0.001910203 
## ... Similar to previous best
## Run 16 stress 0.05913887 
## ... Procrustes: rmse 0.0004784922  max resid 0.005581082 
## ... Similar to previous best
## Run 17 stress 0.05913735 
## ... New best solution
## ... Procrustes: rmse 9.264754e-05  max resid 0.0008072059 
## ... Similar to previous best
## Run 18 stress 0.05914039 
## ... Procrustes: rmse 0.0002123473  max resid 0.001636732 
## ... Similar to previous best
## Run 19 stress 0.05913911 
## ... Procrustes: rmse 0.0002202349  max resid 0.001526763 
## ... Similar to previous best
## Run 20 stress 0.0591444 
## ... Procrustes: rmse 0.0006004791  max resid 0.006026361 
## ... Similar to previous best
## *** Solution reached
set.seed(4242)
unfrac_uwUF.ord = metaMDS(unfrac_uwUF.dist, trymax = 1000)
## Run 0 stress 0.02489321 
## Run 1 stress 0.02489419 
## ... Procrustes: rmse 0.0001107427  max resid 0.0009647177 
## ... Similar to previous best
## Run 2 stress 0.02489419 
## ... Procrustes: rmse 0.0001072141  max resid 0.0008799464 
## ... Similar to previous best
## Run 3 stress 0.02489368 
## ... Procrustes: rmse 0.0001045403  max resid 0.0009665736 
## ... Similar to previous best
## Run 4 stress 0.0248991 
## ... Procrustes: rmse 0.0002887235  max resid 0.003493072 
## ... Similar to previous best
## Run 5 stress 0.02489325 
## ... Procrustes: rmse 1.739716e-05  max resid 0.0001307099 
## ... Similar to previous best
## Run 6 stress 0.0248935 
## ... Procrustes: rmse 7.646805e-05  max resid 0.0007924833 
## ... Similar to previous best
## Run 7 stress 0.02489961 
## ... Procrustes: rmse 0.0002896331  max resid 0.003517366 
## ... Similar to previous best
## Run 8 stress 0.02489355 
## ... Procrustes: rmse 8.20657e-05  max resid 0.0009236951 
## ... Similar to previous best
## Run 9 stress 0.02489344 
## ... Procrustes: rmse 7.621549e-05  max resid 0.0007885482 
## ... Similar to previous best
## Run 10 stress 0.02489946 
## ... Procrustes: rmse 0.0002871788  max resid 0.003512923 
## ... Similar to previous best
## Run 11 stress 0.02489918 
## ... Procrustes: rmse 0.0002907955  max resid 0.003495008 
## ... Similar to previous best
## Run 12 stress 0.02489929 
## ... Procrustes: rmse 0.0002885055  max resid 0.003503923 
## ... Similar to previous best
## Run 13 stress 0.02489331 
## ... Procrustes: rmse 1.819338e-05  max resid 0.0001310223 
## ... Similar to previous best
## Run 14 stress 0.02489353 
## ... Procrustes: rmse 8.176918e-05  max resid 0.0009245166 
## ... Similar to previous best
## Run 15 stress 0.02489941 
## ... Procrustes: rmse 0.0002818099  max resid 0.00350957 
## ... Similar to previous best
## Run 16 stress 0.02489329 
## ... Procrustes: rmse 1.217705e-05  max resid 0.0001017034 
## ... Similar to previous best
## Run 17 stress 0.02489918 
## ... Procrustes: rmse 0.0002816298  max resid 0.003505723 
## ... Similar to previous best
## Run 18 stress 0.02489916 
## ... Procrustes: rmse 0.0002872557  max resid 0.003450369 
## ... Similar to previous best
## Run 19 stress 0.02489355 
## ... Procrustes: rmse 7.573582e-05  max resid 0.0007661766 
## ... Similar to previous best
## Run 20 stress 0.02489951 
## ... Procrustes: rmse 0.0002911398  max resid 0.00350765 
## ... Similar to previous best
## *** Solution reached
set.seed(4242)
unfrac_wUF.ord = metaMDS(unfrac_wUF.dist, trymax = 1000)
## Run 0 stress 0.102025 
## Run 1 stress 0.1165711 
## Run 2 stress 0.1233004 
## Run 3 stress 0.1165703 
## Run 4 stress 0.1020303 
## ... Procrustes: rmse 0.001063699  max resid 0.008267948 
## ... Similar to previous best
## Run 5 stress 0.115918 
## Run 6 stress 0.1137215 
## Run 7 stress 0.1171678 
## Run 8 stress 0.1145296 
## Run 9 stress 0.1135719 
## Run 10 stress 0.1137748 
## Run 11 stress 0.1149016 
## Run 12 stress 0.1153024 
## Run 13 stress 0.1301713 
## Run 14 stress 0.1103646 
## Run 15 stress 0.1186415 
## Run 16 stress 0.1161794 
## Run 17 stress 0.1137223 
## Run 18 stress 0.1088928 
## Run 19 stress 0.1059812 
## Run 20 stress 0.1171131 
## *** Solution reached
unfrac_BC.ord.df = data.frame(unfrac_BC.ord$points) %>%
  tibble::rownames_to_column(var="X.Sample") %>%
  left_join(data.frame(sample_data(unfrac.physeq)) %>% select(X.Sample, ecosystem, substrate, day),
            by = "X.Sample") %>%
  mutate(ecosystem = factor(ecosystem, levels = c("agriculture", "meadow", "forest")))

unfrac_uwUF.ord.df = data.frame(unfrac_uwUF.ord$points) %>%
  tibble::rownames_to_column(var="X.Sample") %>%
  left_join(data.frame(sample_data(unfrac.physeq)) %>% select(X.Sample, ecosystem, substrate, day),
            by = "X.Sample") %>%
  mutate(ecosystem = factor(ecosystem, levels = c("agriculture", "meadow", "forest")))

unfrac_wUF.ord.df = data.frame(unfrac_wUF.ord$points) %>%
  tibble::rownames_to_column(var="X.Sample") %>%
  left_join(data.frame(sample_data(unfrac.physeq)) %>% select(X.Sample, ecosystem, substrate, day),
            by = "X.Sample") %>%
  mutate(ecosystem = factor(ecosystem, levels = c("agriculture", "meadow", "forest")))

Now I’ll plot. I want to add in arrows showing the direction of community shift over time. These arrows will be based on the centroid of the points for a given day and land use. I also want to show how these communities all relate to the H2O-control so I’ll add those points in as blue colored points.

# Get centroids for plotting the paths
unfrac_meta.df = data.frame(sample_data(unfrac.physeq)) %>%
  mutate(treatment = ifelse(substrate == "H2O-Con", "Water", "Carbon")) %>%
  mutate(group = paste(ecosystem, day, treatment, sep="-"))

prevday.df = data.frame(day=c(0,1,3,6,14,30),
                        prev_day=c(NA,0,1,3,6,14))

unfrac_BC.centroids = data.frame(envfit(unfrac_BC.ord ~ group, data=unfrac_meta.df)$factors$centroids) %>%
  tibble::rownames_to_column(var="group") %>%
  mutate(group = gsub("group", "", group)) %>%
  left_join(unique(select(unfrac_meta.df, ecosystem, day, treatment, group)), by="group") %>%
  left_join(prevday.df, by="day") %>%
  as.data.frame
unfrac_BC.previous.centroids = unfrac_BC.centroids %>%
  select(ecosystem, treatment, NMDS1, NMDS2, day) %>%
  rename(NMDS1_prev = NMDS1, NMDS2_prev = NMDS2, prev_day = day)
unfrac_BC.centroids = left_join(unfrac_BC.centroids, unfrac_BC.previous.centroids, by = c("ecosystem", "treatment", "prev_day")) %>%
  filter(day != 0, treatment == "Carbon") %>%
  mutate(ecosystem = factor(ecosystem, levels = c("agriculture", "meadow", "forest")))


unfrac_BC.ord.plot = ggplot(data=unfrac_BC.ord.df, aes(x=MDS1, y=MDS2)) +
  geom_point(data=unfrac_BC.ord.df[unfrac_BC.ord.df$substrate == "H2O-Con" & unfrac_BC.ord.df$ecosystem == "agriculture",], 
             shape=15, color="blue", size=3) +
  geom_point(data=unfrac_BC.ord.df[unfrac_BC.ord.df$substrate == "H2O-Con" & unfrac_BC.ord.df$ecosystem == "meadow",], 
             shape=16, color="blue", size=3) +
  geom_point(data=unfrac_BC.ord.df[unfrac_BC.ord.df$substrate == "H2O-Con" & unfrac_BC.ord.df$ecosystem == "forest",], 
             shape=17, color="blue", size=3) +
  geom_point(data=unfrac_BC.ord.df[unfrac_BC.ord.df$substrate != "H2O-Con",], 
             aes(fill=day, shape=ecosystem), size=3) +
  geom_segment(data=unfrac_BC.centroids, aes(x=NMDS1_prev, xend=NMDS1, y=NMDS2_prev, yend=NMDS2, color=ecosystem),
               size=1, arrow = arrow(length = unit(0.02, "npc"))) +
  scale_fill_gradient(low="grey90", high="black") +
  scale_color_manual(values = eco.col, labels = c("agriculture" = "Cropland", "meadow" = "Old-Field", "forest" = "Forest")) +
  scale_shape_manual(values=c(agriculture=22, meadow=21, forest=24), labels = c("agriculture" = "Cropland", "meadow" = "Old-Field", "forest" = "Forest")) +
  labs(x="NMDS1", y="NMDS2", fill="Day", shape="Land-use", color="Land-use", title="Bray-Curtis dissimilarity") +
  theme_bw() +
  theme(axis.title=element_text(size=14),
        axis.text=element_text(size=12),
        legend.title=element_text(size=14),
        legend.text=element_text(size=12),
        legend.background = element_blank())

unfrac_BC.ord.plot

# Get centroids for plotting the paths
unfrac_meta.df = data.frame(sample_data(unfrac.physeq)) %>%
  mutate(treatment = ifelse(substrate == "H2O-Con", "Water", "Carbon")) %>%
  mutate(group = paste(ecosystem, day, treatment, sep="-"))

prevday.df = data.frame(day=c(0,1,3,6,14,30),
                        prev_day=c(NA,0,1,3,6,14))

unfrac_uwUF.centroids = data.frame(envfit(unfrac_uwUF.ord ~ group, data=unfrac_meta.df)$factors$centroids) %>%
  tibble::rownames_to_column(var="group") %>%
  mutate(group = gsub("group", "", group)) %>%
  left_join(unique(select(unfrac_meta.df, ecosystem, day, treatment, group)), by="group") %>%
  left_join(prevday.df, by="day") %>%
  as.data.frame
unfrac_uwUF.previous.centroids = unfrac_uwUF.centroids %>%
  select(ecosystem, treatment, NMDS1, NMDS2, day) %>%
  rename(NMDS1_prev = NMDS1, NMDS2_prev = NMDS2, prev_day = day)
unfrac_uwUF.centroids = left_join(unfrac_uwUF.centroids, unfrac_uwUF.previous.centroids, by = c("ecosystem", "treatment", "prev_day")) %>%
  filter(day != 0, treatment == "Carbon") %>%
  mutate(ecosystem = factor(ecosystem, levels = c("agriculture", "meadow", "forest")))


unfrac_uwUF.ord.plot = ggplot(data=unfrac_uwUF.ord.df, aes(x=MDS1, y=MDS2)) +
  geom_point(data=unfrac_uwUF.ord.df[unfrac_uwUF.ord.df$substrate == "H2O-Con" & unfrac_uwUF.ord.df$ecosystem == "agriculture",], 
             shape=15, color="blue", size=3) +
  geom_point(data=unfrac_uwUF.ord.df[unfrac_uwUF.ord.df$substrate == "H2O-Con" & unfrac_uwUF.ord.df$ecosystem == "meadow",], 
             shape=16, color="blue", size=3) +
  geom_point(data=unfrac_uwUF.ord.df[unfrac_uwUF.ord.df$substrate == "H2O-Con" & unfrac_uwUF.ord.df$ecosystem == "forest",], 
             shape=17, color="blue", size=3) +
  geom_point(data=unfrac_uwUF.ord.df[unfrac_uwUF.ord.df$substrate != "H2O-Con",], 
             aes(fill=day, shape=ecosystem), size=3) +
  geom_segment(data=unfrac_uwUF.centroids, aes(x=NMDS1_prev, xend=NMDS1, y=NMDS2_prev, yend=NMDS2, color=ecosystem),
               size=1, arrow = arrow(length = unit(0.02, "npc"))) +
  scale_fill_gradient(low="grey90", high="black") +
  scale_color_manual(values = eco.col, labels = c("agriculture" = "Cropland", "meadow" = "Old-Field", "forest" = "Forest")) +
  scale_shape_manual(values=c(agriculture=22, meadow=21, forest=24), labels = c("agriculture" = "Cropland", "meadow" = "Old-Field", "forest" = "Forest")) +
  labs(x="NMDS1", y="NMDS2", fill="Day", shape="Land-use", color="Land-use", title="Unweighted UniFrac distance") +
  theme_bw() +
  theme(axis.title=element_text(size=14),
        axis.text=element_text(size=12),
        legend.title=element_text(size=14),
        legend.text=element_text(size=12),
        legend.background = element_blank())

unfrac_uwUF.ord.plot

# Get centroids for plotting the paths
unfrac_meta.df = data.frame(sample_data(unfrac.physeq)) %>%
  mutate(treatment = ifelse(substrate == "H2O-Con", "Water", "Carbon")) %>%
  mutate(group = paste(ecosystem, day, treatment, sep="-"))

prevday.df = data.frame(day=c(0,1,3,6,14,30),
                        prev_day=c(NA,0,1,3,6,14))

unfrac_wUF.centroids = data.frame(envfit(unfrac_wUF.ord ~ group, data=unfrac_meta.df)$factors$centroids) %>%
  tibble::rownames_to_column(var="group") %>%
  mutate(group = gsub("group", "", group)) %>%
  left_join(unique(select(unfrac_meta.df, ecosystem, day, treatment, group)), by="group") %>%
  left_join(prevday.df, by="day") %>%
  as.data.frame
unfrac_wUF.previous.centroids = unfrac_wUF.centroids %>%
  select(ecosystem, treatment, NMDS1, NMDS2, day) %>%
  rename(NMDS1_prev = NMDS1, NMDS2_prev = NMDS2, prev_day = day)
unfrac_wUF.centroids = left_join(unfrac_wUF.centroids, unfrac_wUF.previous.centroids, by = c("ecosystem", "treatment", "prev_day")) %>%
  filter(day != 0, treatment == "Carbon") %>%
  mutate(ecosystem = factor(ecosystem, levels = c("agriculture", "meadow", "forest")))

unfrac_wUF.ord.plot = ggplot(data=unfrac_wUF.ord.df, aes(x=MDS1, y=MDS2)) +
  geom_point(data=unfrac_wUF.ord.df[unfrac_wUF.ord.df$substrate == "H2O-Con" & unfrac_wUF.ord.df$ecosystem == "agriculture",], 
             shape=15, color="blue", size=3) +
  geom_point(data=unfrac_wUF.ord.df[unfrac_wUF.ord.df$substrate == "H2O-Con" & unfrac_wUF.ord.df$ecosystem == "meadow",], 
             shape=16, color="blue", size=3) +
  geom_point(data=unfrac_wUF.ord.df[unfrac_wUF.ord.df$substrate == "H2O-Con" & unfrac_wUF.ord.df$ecosystem == "forest",], 
             shape=17, color="blue", size=3) +
  geom_point(data=unfrac_wUF.ord.df[unfrac_wUF.ord.df$substrate != "H2O-Con",], 
             aes(fill=day, shape=ecosystem), size=3) +
  geom_segment(data=unfrac_wUF.centroids, aes(x=NMDS1_prev, xend=NMDS1, y=NMDS2_prev, yend=NMDS2, color=ecosystem),
               size=1, arrow = arrow(length = unit(0.02, "npc"))) +
  scale_fill_gradient(low="grey90", high="black") +
  scale_color_manual(values = eco.col, labels = c("agriculture" = "Cropland", "meadow" = "Old-Field", "forest" = "Forest")) +
  scale_shape_manual(values=c(agriculture=22, meadow=21, forest=24), labels = c("agriculture" = "Cropland", "meadow" = "Old-Field", "forest" = "Forest")) +
  labs(x="NMDS1", y="NMDS2", fill="Day", shape="Land-use", color="Land-use", title="Weighted UniFrac distance") +
  theme_bw() +
  theme(axis.title=element_text(size=14),
        axis.text=element_text(size=12),
        legend.title=element_text(size=14),
        legend.text=element_text(size=12),
        legend.background = element_blank())

unfrac_wUF.ord.plot

Plot all together

unfrac.ord.leg = g_legend(unfrac_BC.ord.plot + theme(legend.title=element_text(size=14),
                                                     legend.text=element_text(size=12),
                                                     legend.background = element_blank(),
                                                     legend.box = "horizontal"))

beta_ords.plot = cowplot::plot_grid(unfrac_BC.ord.plot + theme(legend.position = "none", title = element_blank()), 
                                    unfrac_uwUF.ord.plot + theme(legend.position = "none", title = element_blank()), 
                                    unfrac_wUF.ord.plot+ theme(legend.position = "none", title = element_blank()),
                                    unfrac.ord.leg, nrow=2, labels = c("A", "B", "C", ""))
beta_ords.plot

#ggsave(beta_ords.plot, filename = "/Users/sambarnett/Documents/Dissertation/figures/fig2_2.tiff", 
#       device = "tiff", width = 7, height = 7, units = "in")

Plot for publication

# Get centroids for plotting the paths
unfrac_meta.df = data.frame(sample_data(unfrac.physeq)) %>%
  mutate(treatment = ifelse(substrate == "H2O-Con", "Water", "Carbon")) %>%
  mutate(group = paste(ecosystem, day, treatment, sep="-"))

prevday.df = data.frame(day=c(0,1,3,6,14,30),
                        prev_day=c(NA,0,1,3,6,14))

unfrac_BC.centroids = data.frame(envfit(unfrac_BC.ord ~ group, data=unfrac_meta.df)$factors$centroids) %>%
  tibble::rownames_to_column(var="group") %>%
  mutate(group = gsub("group", "", group)) %>%
  left_join(unique(select(unfrac_meta.df, ecosystem, day, treatment, group)), by="group") %>%
  left_join(prevday.df, by="day") %>%
  as.data.frame
unfrac_BC.previous.centroids = unfrac_BC.centroids %>%
  select(ecosystem, treatment, NMDS1, NMDS2, day) %>%
  rename(NMDS1_prev = NMDS1, NMDS2_prev = NMDS2, prev_day = day)
unfrac_BC.centroids = left_join(unfrac_BC.centroids, unfrac_BC.previous.centroids, by = c("ecosystem", "treatment", "prev_day")) %>%
  filter(day != 0, treatment == "Carbon") %>%
  mutate(ecosystem = factor(ecosystem, levels = c("agriculture", "meadow", "forest")))


unfrac_BC.ord.plot = ggplot(data=unfrac_BC.ord.df, aes(x=MDS1, y=MDS2)) +
  geom_point(data=unfrac_BC.ord.df[unfrac_BC.ord.df$substrate == "H2O-Con" & unfrac_BC.ord.df$ecosystem == "agriculture",], 
             shape=15, color="blue", size=2) +
  geom_point(data=unfrac_BC.ord.df[unfrac_BC.ord.df$substrate == "H2O-Con" & unfrac_BC.ord.df$ecosystem == "meadow",], 
             shape=16, color="blue", size=2) +
  geom_point(data=unfrac_BC.ord.df[unfrac_BC.ord.df$substrate == "H2O-Con" & unfrac_BC.ord.df$ecosystem == "forest",], 
             shape=17, color="blue", size=2) +
  geom_point(data=unfrac_BC.ord.df[unfrac_BC.ord.df$substrate != "H2O-Con",], 
             aes(fill=day, shape=ecosystem), size=2) +
  geom_segment(data=unfrac_BC.centroids, aes(x=NMDS1_prev, xend=NMDS1, y=NMDS2_prev, yend=NMDS2), color="black",
               size=1, arrow = arrow(length = unit(0.02, "npc"))) +
  geom_segment(data=unfrac_BC.centroids, aes(x=NMDS1_prev, xend=NMDS1, y=NMDS2_prev, yend=NMDS2, color=ecosystem),
               size=0.5, arrow = arrow(length = unit(0.02, "npc"))) +
  scale_fill_gradient(low="grey90", high="black") +
  scale_color_manual(values = eco.col, labels = c("agriculture" = "Cropland", "meadow" = "Old-Field", "forest" = "Forest")) +
  scale_shape_manual(values=c(agriculture=22, meadow=21, forest=24), labels = c("agriculture" = "Cropland", "meadow" = "Old-Field", "forest" = "Forest")) +
  labs(x="NMDS1", y="NMDS2", fill="Day", shape="Land-use", color="Land-use", title="Bray-Curtis dissimilarity") +
  theme_bw() +
  theme(axis.title=element_text(size=7),
        axis.text=element_text(size=6),
        axis.ticks = element_line(size=0.2),
        legend.title=element_text(size=7),
        legend.text=element_text(size=6),
        legend.background = element_blank())

unfrac_BC.ord.plot + 
  theme(axis.title=element_blank(),
        plot.title = element_blank(),
        legend.position = "none")

# Get centroids for plotting the paths
unfrac_meta.df = data.frame(sample_data(unfrac.physeq)) %>%
  mutate(treatment = ifelse(substrate == "H2O-Con", "Water", "Carbon")) %>%
  mutate(group = paste(ecosystem, day, treatment, sep="-"))

prevday.df = data.frame(day=c(0,1,3,6,14,30),
                        prev_day=c(NA,0,1,3,6,14))

unfrac_uwUF.centroids = data.frame(envfit(unfrac_uwUF.ord ~ group, data=unfrac_meta.df)$factors$centroids) %>%
  tibble::rownames_to_column(var="group") %>%
  mutate(group = gsub("group", "", group)) %>%
  left_join(unique(select(unfrac_meta.df, ecosystem, day, treatment, group)), by="group") %>%
  left_join(prevday.df, by="day") %>%
  as.data.frame
unfrac_uwUF.previous.centroids = unfrac_uwUF.centroids %>%
  select(ecosystem, treatment, NMDS1, NMDS2, day) %>%
  rename(NMDS1_prev = NMDS1, NMDS2_prev = NMDS2, prev_day = day)
unfrac_uwUF.centroids = left_join(unfrac_uwUF.centroids, unfrac_uwUF.previous.centroids, by = c("ecosystem", "treatment", "prev_day")) %>%
  filter(day != 0, treatment == "Carbon") %>%
  mutate(ecosystem = factor(ecosystem, levels = c("agriculture", "meadow", "forest")))


unfrac_uwUF.ord.plot = ggplot(data=unfrac_uwUF.ord.df, aes(x=MDS1, y=MDS2)) +
  geom_point(data=unfrac_uwUF.ord.df[unfrac_uwUF.ord.df$substrate == "H2O-Con" & unfrac_uwUF.ord.df$ecosystem == "agriculture",], 
             shape=15, color="blue", size=2) +
  geom_point(data=unfrac_uwUF.ord.df[unfrac_uwUF.ord.df$substrate == "H2O-Con" & unfrac_uwUF.ord.df$ecosystem == "meadow",], 
             shape=16, color="blue", size=2) +
  geom_point(data=unfrac_uwUF.ord.df[unfrac_uwUF.ord.df$substrate == "H2O-Con" & unfrac_uwUF.ord.df$ecosystem == "forest",], 
             shape=17, color="blue", size=2) +
  geom_point(data=unfrac_uwUF.ord.df[unfrac_uwUF.ord.df$substrate != "H2O-Con",], 
             aes(fill=day, shape=ecosystem), size=2) +
  geom_segment(data=unfrac_uwUF.centroids, aes(x=NMDS1_prev, xend=NMDS1, y=NMDS2_prev, yend=NMDS2), color="black",
               size=1, arrow = arrow(length = unit(0.02, "npc"))) +
  geom_segment(data=unfrac_uwUF.centroids, aes(x=NMDS1_prev, xend=NMDS1, y=NMDS2_prev, yend=NMDS2, color=ecosystem),
               size=0.5, arrow = arrow(length = unit(0.02, "npc"))) +
  scale_fill_gradient(low="grey90", high="black") +
  scale_color_manual(values = eco.col, labels = c("agriculture" = "Cropland", "meadow" = "Old-Field", "forest" = "Forest")) +
  scale_shape_manual(values=c(agriculture=22, meadow=21, forest=24), labels = c("agriculture" = "Cropland", "meadow" = "Old-Field", "forest" = "Forest")) +
  labs(x="NMDS1", y="NMDS2", fill="Day", shape="Land-use", color="Land-use", title="Unweighted UniFrac distance") +
  theme_bw() +
  theme(axis.title=element_text(size=7),
        axis.text=element_text(size=6),
        axis.ticks = element_line(size=0.2),
        legend.title=element_text(size=7),
        legend.text=element_text(size=6),
        legend.background = element_blank())

unfrac_uwUF.ord.plot + 
  theme(axis.title=element_blank(),
        plot.title = element_blank(),
        legend.position = "none")

# Get centroids for plotting the paths
unfrac_meta.df = data.frame(sample_data(unfrac.physeq)) %>%
  mutate(treatment = ifelse(substrate == "H2O-Con", "Water", "Carbon")) %>%
  mutate(group = paste(ecosystem, day, treatment, sep="-"))

prevday.df = data.frame(day=c(0,1,3,6,14,30),
                        prev_day=c(NA,0,1,3,6,14))

unfrac_wUF.centroids = data.frame(envfit(unfrac_wUF.ord ~ group, data=unfrac_meta.df)$factors$centroids) %>%
  tibble::rownames_to_column(var="group") %>%
  mutate(group = gsub("group", "", group)) %>%
  left_join(unique(select(unfrac_meta.df, ecosystem, day, treatment, group)), by="group") %>%
  left_join(prevday.df, by="day") %>%
  as.data.frame
unfrac_wUF.previous.centroids = unfrac_wUF.centroids %>%
  select(ecosystem, treatment, NMDS1, NMDS2, day) %>%
  rename(NMDS1_prev = NMDS1, NMDS2_prev = NMDS2, prev_day = day)
unfrac_wUF.centroids = left_join(unfrac_wUF.centroids, unfrac_wUF.previous.centroids, by = c("ecosystem", "treatment", "prev_day")) %>%
  filter(day != 0, treatment == "Carbon") %>%
  mutate(ecosystem = factor(ecosystem, levels = c("agriculture", "meadow", "forest")))

unfrac_wUF.ord.plot = ggplot(data=unfrac_wUF.ord.df, aes(x=MDS1, y=MDS2)) +
  geom_point(data=unfrac_wUF.ord.df[unfrac_wUF.ord.df$substrate == "H2O-Con" & unfrac_wUF.ord.df$ecosystem == "agriculture",], 
             shape=15, color="blue", size=2) +
  geom_point(data=unfrac_wUF.ord.df[unfrac_wUF.ord.df$substrate == "H2O-Con" & unfrac_wUF.ord.df$ecosystem == "meadow",], 
             shape=16, color="blue", size=2) +
  geom_point(data=unfrac_wUF.ord.df[unfrac_wUF.ord.df$substrate == "H2O-Con" & unfrac_wUF.ord.df$ecosystem == "forest",], 
             shape=17, color="blue", size=2) +
  geom_point(data=unfrac_wUF.ord.df[unfrac_wUF.ord.df$substrate != "H2O-Con",], 
             aes(fill=day, shape=ecosystem), size=2) +
  geom_segment(data=unfrac_wUF.centroids, aes(x=NMDS1_prev, xend=NMDS1, y=NMDS2_prev, yend=NMDS2), color="black",
               size=1, arrow = arrow(length = unit(0.02, "npc"))) +
  geom_segment(data=unfrac_wUF.centroids, aes(x=NMDS1_prev, xend=NMDS1, y=NMDS2_prev, yend=NMDS2, color=ecosystem),
               size=0.5, arrow = arrow(length = unit(0.02, "npc"))) +
  scale_fill_gradient(low="grey90", high="black") +
  scale_color_manual(values = eco.col, labels = c("agriculture" = "Cropland", "meadow" = "Old-Field", "forest" = "Forest")) +
  scale_shape_manual(values=c(agriculture=22, meadow=21, forest=24), labels = c("agriculture" = "Cropland", "meadow" = "Old-Field", "forest" = "Forest")) +
  labs(x="NMDS1", y="NMDS2", fill="Day", shape="Land-use", color="Land-use", title="Weighted UniFrac distance") +
  theme_bw() +
  theme(axis.title=element_text(size=7),
        axis.text=element_text(size=6),
        axis.ticks = element_line(size=0.2),
        legend.title=element_text(size=7),
        legend.text=element_text(size=6),
        legend.background = element_blank())

unfrac_wUF.ord.plot + 
  theme(axis.title=element_blank(),
        plot.title = element_blank(),
        legend.position = "none")

Plot all together

library(grid)
library(gridExtra)

unfrac.ord.leg = g_legend(unfrac_BC.ord.plot + labs(fill="Day", shape="Land-use", color="Land-use") +
                            theme(legend.title=element_text(size=7, hjust=0.5),
                                  legend.text=element_text(size=6),
                                  legend.background = element_blank(),
                                  legend.position = "top") +
                            guides(fill = guide_colourbar(barheight = 0.5, title.position="top"),
                                   color = guide_legend(title.position="top"),
                                   shape = guide_legend(title.position="top")))

beta_ords.plot = cowplot::plot_grid(unfrac_BC.ord.plot + theme(legend.position = "none", title = element_blank(), axis.title=element_blank()) + 
                                      scale_y_continuous(breaks=c(-0.2, -0.1, 0, 0.1, 0.2)),
                                    unfrac_uwUF.ord.plot + theme(legend.position = "none", title = element_blank(), axis.title=element_blank()), 
                                    unfrac_wUF.ord.plot+ theme(legend.position = "none", title = element_blank(), axis.title=element_blank()),
                                    nrow=1, labels = c("a", "b", "c"), label_size = 10)
y.grob <- textGrob("NMDS2", gp=gpar(col="black", fontsize=7), rot=90)
x.grob <- textGrob("NMDS1", gp=gpar(col="black", fontsize=7))
beta_ords.plot = grid.arrange(arrangeGrob(beta_ords.plot, left = y.grob, bottom = x.grob))

beta_ords_leg.plot = cowplot::plot_grid(beta_ords.plot, unfrac.ord.leg, nrow=2, rel_heights = c(1, 0.2))
beta_ords_leg.plot

PERMANOVA

# Get metadata without H2O-Con samples
meta_noH2O.df = data.frame(sample_data(unfrac.rare.physeq)) %>%
  select(X.Sample, ecosystem, substrate, day) %>%
  filter(substrate != "H2O-Con") %>%
  mutate(X.Sample = as.character(X.Sample))
rownames(meta_noH2O.df) = meta_noH2O.df$X.Sample

# Remove the H2O-Con samples from the distance matrices
unfrac_BC_noH2O.dist = dist(as.matrix(unfrac_BC.dist)[meta_noH2O.df$X.Sample, meta_noH2O.df$X.Sample])
unfrac_uwUF_noH2O.dist = dist(as.matrix(unfrac_uwUF.dist)[meta_noH2O.df$X.Sample, meta_noH2O.df$X.Sample])
unfrac_wUF_noH2O.dist = dist(as.matrix(unfrac_wUF.dist)[meta_noH2O.df$X.Sample, meta_noH2O.df$X.Sample])

print("Bray-Curtis")
## [1] "Bray-Curtis"
set.seed(4242)
unfrac_BC_noH2O.perm = adonis(unfrac_BC_noH2O.dist ~ ecosystem*day, data=meta_noH2O.df, permutations = 999)
unfrac_BC_noH2O.perm
## 
## Call:
## adonis(formula = unfrac_BC_noH2O.dist ~ ecosystem * day, data = meta_noH2O.df,      permutations = 999) 
## 
## Permutation: free
## Number of permutations: 999
## 
## Terms added sequentially (first to last)
## 
##                Df SumsOfSqs MeanSqs F.Model      R2 Pr(>F)    
## ecosystem       2    708.70  354.35  961.81 0.91483  0.001 ***
## day             1      4.39    4.39   11.91 0.00567  0.001 ***
## ecosystem:day   2      6.33    3.17    8.59 0.00817  0.001 ***
## Residuals     150     55.26    0.37         0.07134           
## Total         155    774.68                 1.00000           
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
print("Unweighted UniFrac")
## [1] "Unweighted UniFrac"
set.seed(4242)
unfrac_uwUF_noH2O.perm = adonis(unfrac_uwUF_noH2O.dist ~ ecosystem*day, data=meta_noH2O.df, permutations = 999)
unfrac_uwUF_noH2O.perm
## 
## Call:
## adonis(formula = unfrac_uwUF_noH2O.dist ~ ecosystem * day, data = meta_noH2O.df,      permutations = 999) 
## 
## Permutation: free
## Number of permutations: 999
## 
## Terms added sequentially (first to last)
## 
##                Df SumsOfSqs MeanSqs F.Model      R2 Pr(>F)    
## ecosystem       2   170.745  85.373 309.562 0.79279  0.001 ***
## day             1     1.529   1.529   5.543 0.00710  0.002 ** 
## ecosystem:day   2     1.732   0.866   3.139 0.00804  0.013 *  
## Residuals     150    41.368   0.276         0.19207           
## Total         155   215.373                 1.00000           
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
print("Weighted UniFrac")
## [1] "Weighted UniFrac"
set.seed(4242)
unfrac_wUF_noH2O.perm = adonis(unfrac_wUF_noH2O.dist ~ ecosystem*day, data=meta_noH2O.df, permutations = 999)
unfrac_wUF_noH2O.perm
## 
## Call:
## adonis(formula = unfrac_wUF_noH2O.dist ~ ecosystem * day, data = meta_noH2O.df,      permutations = 999) 
## 
## Permutation: free
## Number of permutations: 999
## 
## Terms added sequentially (first to last)
## 
##                Df SumsOfSqs MeanSqs F.Model      R2 Pr(>F)    
## ecosystem       2   167.188  83.594  332.73 0.78399  0.001 ***
## day             1     3.765   3.765   14.99 0.01766  0.001 ***
## ecosystem:day   2     4.614   2.307    9.18 0.02164  0.001 ***
## Residuals     150    37.686   0.251         0.17672           
## Total         155   213.253                 1.00000           
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Resistance/resilience

Based on the ordinations, the community compositions clearly change over time after carbon substrates are added to the microcosm. It further looks as though the degree of compositional change differes across land-use regime where agriculture soils seem to show the greatest change. It also looks as though this change is somewhat reversed over time where the communities on the final day (30) seem to be more similar to the starting day (0) than the earlier timepoints. With this in mind I will look to see if we are seeing resilience or resistance in these communities and how these differ across land use regime. Resistance here is defined by the amount of change in the community after a disturbance, which in this case is carbon addtion. Resilience is the ability of the community to return to its original stable composition after a disturbance.

To examine resistance an resilience, I will compare the dissimilairty/distance in community composition between each day and day 0 to the starting variation between the day 0 replicates. Essentially, this analysis will tell us whether the shift in community composition is greater or less than what might be expected between untreated soil microcosms.

I will run this analysis using Bray-Curtis, unweighted UniFrac, and UniFrac. First I need to collect all paired distances of interest.

# Get paired samples
unfrac_metadata.df = data.frame(sample_data(unfrac.physeq)) %>%
  mutate(treatment = ifelse(substrate == "H2O-Con", "Water", "Carbon"),
         X.Sample = as.character(X.Sample)) %>%
  select(X.Sample, day, ecosystem, treatment, microcosm_replicate)

unfrac_day0.df = unfrac_metadata.df %>%
  filter(day == 0) %>%
  select(X.Sample, ecosystem) %>%
  rename(day0.Sample = X.Sample)

unfrac_daypaired.df = unfrac_metadata.df %>%
  filter(day != 0) %>%
  rename(dayX.Sample = X.Sample) %>%
  left_join(unfrac_day0.df, by = "ecosystem") %>%
  select(-microcosm_replicate)

# Get the distances/dissimilarities between the two day 0 replicates
unfrac_day0_within.df = unfrac_metadata.df %>%
  filter(day == 0) %>%
  select(X.Sample, ecosystem, microcosm_replicate)

unfrac_day0.BC.df = as.matrix(unfrac_BC.dist)[unique(unfrac_day0_within.df[unfrac_day0_within.df$microcosm_replicate==1,]$X.Sample),
                                     as.character(unfrac_day0_within.df[unfrac_day0_within.df$microcosm_replicate==2,]$X.Sample)] %>%
  as.data.frame %>%
  tibble::rownames_to_column(var="X.Sample") %>%
  tidyr::gather(key="day0.Sample", value="Day0_dist", -X.Sample) %>%
  mutate(day0.Sample = gsub("12C.Con", "12C-Con", day0.Sample)) %>%
  inner_join(unfrac_day0_within.df, by = "X.Sample") %>%
  inner_join(unfrac_day0.df, by = c("day0.Sample", "ecosystem")) %>%
  select(ecosystem, Day0_dist) %>%
  mutate(measure="Bray-Curtis")

unfrac_day0.uwUF.df = as.matrix(unfrac_uwUF.dist)[unique(unfrac_day0_within.df[unfrac_day0_within.df$microcosm_replicate==1,]$X.Sample),
                                     as.character(unfrac_day0_within.df[unfrac_day0_within.df$microcosm_replicate==2,]$X.Sample)] %>%
  as.data.frame %>%
  tibble::rownames_to_column(var="X.Sample") %>%
  tidyr::gather(key="day0.Sample", value="Day0_dist", -X.Sample) %>%
  mutate(day0.Sample = gsub("12C.Con", "12C-Con", day0.Sample)) %>%
  inner_join(unfrac_day0_within.df, by = "X.Sample") %>%
  inner_join(unfrac_day0.df, by = c("day0.Sample", "ecosystem")) %>%
  select(ecosystem, Day0_dist) %>%
  mutate(measure="Unweighted UniFrac")

unfrac_day0.wUF.df = as.matrix(unfrac_wUF.dist)[unique(unfrac_day0_within.df[unfrac_day0_within.df$microcosm_replicate==1,]$X.Sample),
                                     as.character(unfrac_day0_within.df[unfrac_day0_within.df$microcosm_replicate==2,]$X.Sample)] %>%
  as.data.frame %>%
  tibble::rownames_to_column(var="X.Sample") %>%
  tidyr::gather(key="day0.Sample", value="Day0_dist", -X.Sample) %>%
  mutate(day0.Sample = gsub("12C.Con", "12C-Con", day0.Sample)) %>%
  inner_join(unfrac_day0_within.df, by = "X.Sample") %>%
  inner_join(unfrac_day0.df, by = c("day0.Sample", "ecosystem")) %>%
  select(ecosystem, Day0_dist) %>%
  mutate(measure="Weighted UniFrac")

unfrac_day0.dist.df = rbind(unfrac_day0.BC.df, unfrac_day0.uwUF.df, unfrac_day0.wUF.df)


# Get distances/dissimilarities between the each day and day 0.
unfrac_daypaired.BC.df = as.matrix(unfrac_BC.dist)[unique(unfrac_daypaired.df$dayX.Sample),
                                     as.character(unfrac_daypaired.df$day0.Sample)] %>%
  as.data.frame %>%
  tibble::rownames_to_column(var="dayX.Sample") %>%
  tidyr::gather(key="day0.Sample", value="distance", -dayX.Sample) %>%
  mutate(day0.Sample = gsub("12C.Con", "12C-Con", day0.Sample)) %>%
  inner_join(unfrac_daypaired.df, by = c("dayX.Sample", "day0.Sample")) %>%
  mutate(measure="Bray-Curtis")

unfrac_daypaired.uwUF.df = as.matrix(unfrac_uwUF.dist)[unique(unfrac_daypaired.df$dayX.Sample),
                                     as.character(unfrac_daypaired.df$day0.Sample)] %>%
  as.data.frame %>%
  tibble::rownames_to_column(var="dayX.Sample") %>%
  tidyr::gather(key="day0.Sample", value="distance", -dayX.Sample) %>%
  mutate(day0.Sample = gsub("12C.Con", "12C-Con", day0.Sample)) %>%
  inner_join(unfrac_daypaired.df, by = c("dayX.Sample", "day0.Sample")) %>%
  mutate(measure="Unweighted UniFrac")

unfrac_daypaired.wUF.df = as.matrix(unfrac_wUF.dist)[unique(unfrac_daypaired.df$dayX.Sample),
                                     as.character(unfrac_daypaired.df$day0.Sample)] %>%
  as.data.frame %>%
  tibble::rownames_to_column(var="dayX.Sample") %>%
  tidyr::gather(key="day0.Sample", value="distance", -dayX.Sample) %>%
  mutate(day0.Sample = gsub("12C.Con", "12C-Con", day0.Sample)) %>%
  inner_join(unfrac_daypaired.df, by = c("dayX.Sample", "day0.Sample")) %>%
  mutate(measure="Weighted UniFrac")

unfrac_daypaired.dist.df = rbind(unfrac_daypaired.BC.df, unfrac_daypaired.uwUF.df, unfrac_daypaired.wUF.df) %>%
  left_join(unfrac_day0.dist.df, by = c("ecosystem", "measure")) %>%
  mutate(ecosystem = factor(ecosystem, levels = c("agriculture", "meadow", "forest")),
         measure = factor(measure, levels = c("Bray-Curtis", "Unweighted UniFrac", "Weighted UniFrac")))

unfrac_daypaired.dist.sum = unfrac_daypaired.dist.df %>%
  group_by(day, ecosystem, treatment, measure) %>%
  summarize(mean_dist = mean(distance),
            sd_dist = sd(distance),
            n_pairs = n()) %>%
  as.data.frame %>%
  mutate(SE_dist = sd_dist/sqrt(n_pairs))

Now I an compare the distances between each day and day 0 to the intra-day 0 distance. I’ll use a single sample t-test and adjust the p-value for multiple comparisons.

# Run analysis separately for each beta-diversity metric
daypair_ttest.df = data.frame()
for (metric in c("Bray-Curtis", "Unweighted UniFrac", "Weighted UniFrac")){
  for (eco in c("agriculture", "meadow", "forest")){
    for (dia in c(1, 3, 6, 14, 30)){
      sub.df = unfrac_daypaired.dist.df %>%
        filter(measure == metric,
               ecosystem == eco,
               treatment == "Carbon",
               day == dia)
      sub.ttest = t.test(sub.df$distance, mu=unique(sub.df$Day0_dist))
      daypair_ttest.df = rbind(daypair_ttest.df, data.frame(ecosystem = eco, day = dia, treatment = "Carbon", measure = metric,
                                                            t = sub.ttest$statistic, df = sub.ttest$parameter, 
                                                            pvalue = sub.ttest$p.value, mean = sub.ttest$estimate, 
                                                            lower.CL = sub.ttest$conf.int[1], upper.CL = sub.ttest$conf.int[2]))
    }
    sub.df = unfrac_daypaired.dist.df %>%
      filter(measure == metric,
             ecosystem == eco,
             treatment == "Water",
             day == 30)
    sub.ttest = t.test(sub.df$distance, mu=unique(sub.df$Day0_dist))
    daypair_ttest.df = rbind(daypair_ttest.df, data.frame(ecosystem = eco, day = 30, treatment = "Water", measure = metric,
                                                          t = sub.ttest$statistic, df = sub.ttest$parameter, 
                                                          pvalue = sub.ttest$p.value, mean = sub.ttest$estimate, 
                                                          lower.CL = sub.ttest$conf.int[1], upper.CL = sub.ttest$conf.int[2]))
  }
}
daypair_ttest.df = daypair_ttest.df %>%
  group_by(measure) %>%
  mutate(padj = p.adjust(pvalue, method="BH")) %>%
  ungroup %>%
  mutate(sig = ifelse(padj < 0.05, "Significant", "Non-significant")) %>%
  mutate(ecosystem = factor(ecosystem, levels = c("agriculture", "meadow", "forest")),
         measure = factor(measure, levels = c("Bray-Curtis", "Unweighted UniFrac", "Weighted UniFrac"))) %>%
  left_join(unfrac_daypaired.dist.sum, by = c("ecosystem", "day", "treatment", "measure"))

daypair_ttest.df
## # A tibble: 54 x 16
##    ecosystem   day treatment measure      t    df   pvalue  mean lower.CL
##    <fct>     <dbl> <chr>     <fct>    <dbl> <dbl>    <dbl> <dbl>    <dbl>
##  1 agricult…     1 Carbon    Bray-C…  2.99     11 1.24e- 2 0.413    0.398
##  2 agricult…     3 Carbon    Bray-C… 16.7      11 3.57e- 9 0.693    0.654
##  3 agricult…     6 Carbon    Bray-C… 30.0      23 6.03e-20 0.643    0.626
##  4 agricult…    14 Carbon    Bray-C… 30.6      27 1.69e-22 0.588    0.575
##  5 agricult…    30 Carbon    Bray-C… 35.1      23 1.73e-21 0.577    0.566
##  6 agricult…    30 Water     Bray-C…  0.916     5 4.02e- 1 0.407    0.368
##  7 meadow        1 Carbon    Bray-C… -0.449    11 6.62e- 1 0.479    0.457
##  8 meadow        3 Carbon    Bray-C… -1.42     11 1.85e- 1 0.471    0.452
##  9 meadow        6 Carbon    Bray-C… -2.27     23 3.26e- 2 0.471    0.459
## 10 meadow       14 Carbon    Bray-C… -2.48     27 1.98e- 2 0.471    0.461
## # … with 44 more rows, and 7 more variables: upper.CL <dbl>, padj <dbl>,
## #   sig <chr>, mean_dist <dbl>, sd_dist <dbl>, n_pairs <int>, SE_dist <dbl>

Now I’ll plot the results.

daypair_ttest.LU.df = left_join(daypair_ttest.df, 
                                data.frame(ecosystem = c("agriculture", "meadow", "forest"),
                                           landuse = factor(c("Cropland", "Old-field", "Forest"), 
                                                            levels=c("Cropland", "Old-field", "Forest"))),
                                by = "ecosystem")

unfrac_daypaired.LU.dist.df = left_join(unfrac_daypaired.dist.df, 
                                        data.frame(ecosystem = c("agriculture", "meadow", "forest"),
                                                   landuse = factor(c("Cropland", "Old-field", "Forest"), 
                                                                    levels=c("Cropland", "Old-field", "Forest"))),
                                        by = "ecosystem")

beta_to0.plot = ggplot(data=daypair_ttest.LU.df, aes(x=day, y=mean)) +
  geom_hline(data=unique(select(unfrac_daypaired.LU.dist.df, landuse, measure, Day0_dist)),
             aes(yintercept=Day0_dist), linetype=2) +
  geom_line(data=daypair_ttest.LU.df[daypair_ttest.LU.df$treatment=="Carbon",]) +
  geom_errorbar(aes(ymin=mean-SE_dist, ymax=mean+SE_dist), width=1) +
  geom_point(aes(fill=sig, shape=treatment), size=3) +
  scale_fill_manual(values = c("Significant" = "black", "Non-significant" = "white")) +
  scale_shape_manual(values = c(Water=24, Carbon = 21)) +
  labs(x="Time since substrate addition (days)", y="Community dissimilarity/distance") +
  theme_bw() +
  theme(legend.position = "none",
        legend.title=element_text(size=14),
        legend.text=element_text(size=12),
        axis.title = element_text(size=14),
        axis.text = element_text(size=12),
        strip.text = element_text(size=10)) +
  facet_grid(measure~landuse, scales = "free_y")

beta_to0.plot

#ggsave(beta_to0.plot, filename = "/Users/sambarnett/Documents/Dissertation/figures/figS2_7.tiff", 
#       device = "tiff", width = 6.69291, height = 6.69291, units = "in")

Convergence/Divergence

Based on the ordinations we can see that the microcosm community compositions cluster very nicely by land-use regime. I’m now curious to see if any of the land-use regimes show convergence or divergence to eachtother. In other words, do any land-use regimes become more (convergence) or less (divergence) similar to eachother after carbon addition?

For this analysis, I’ll measure the dissimilarity/distance between microcosms from different land-use regimes on the same day and compare these values to the mean from day 0. This will effectively show whether the community compositions between two land-use regimes are more or less similar to eachother than initially.

Again, I will run this analysis using Bray-Curtis, unweighted UniFrac, and UniFrac. First I need to collect all paired distances of interest.

# Get paired samples
unfrac_metadata.df = data.frame(sample_data(unfrac.physeq)) %>%
  mutate(treatment = ifelse(substrate == "H2O-Con", "Water", "Carbon"),
         X.Sample = as.character(X.Sample)) %>%
  select(X.Sample, day, ecosystem, treatment)

unfrac_Ag_metadata.df = unfrac_metadata.df %>%
  filter(ecosystem == "agriculture") %>%
  rename(Sample1 = X.Sample, ecosystem_1 = ecosystem)
unfrac_M1_metadata.df = unfrac_metadata.df %>%
  filter(ecosystem == "meadow") %>%
  rename(Sample1 = X.Sample, ecosystem_1 = ecosystem)
unfrac_M2_metadata.df = unfrac_metadata.df %>%
  filter(ecosystem == "meadow") %>%
  rename(Sample2 = X.Sample, ecosystem_2 = ecosystem)
unfrac_F_metadata.df = unfrac_metadata.df %>%
  filter(ecosystem == "forest") %>%
  rename(Sample2 = X.Sample, ecosystem_2 = ecosystem)

unfrac_ecopaired.df = rbind(full_join(unfrac_Ag_metadata.df, unfrac_M2_metadata.df, by = c("day", "treatment")),
                            full_join(unfrac_Ag_metadata.df, unfrac_F_metadata.df, by = c("day", "treatment")),
                            full_join(unfrac_M1_metadata.df, unfrac_F_metadata.df, by = c("day", "treatment")))

# Get the distances/dissimilarities between the land-use regimes
unfrac_ecopaired.BC.df = data.frame(as.matrix(unfrac_BC.dist)) %>%
  tibble::rownames_to_column(var="Sample1") %>%
  tidyr::gather(key="Sample2", value="distance", -Sample1) %>%
  mutate(Sample2 = gsub("13C.", "13C-", gsub("12C.", "12C-", gsub("H2O.", "H2O-", Sample2)))) %>%
  inner_join(unfrac_ecopaired.df, by = c("Sample1", "Sample2")) %>%
  mutate(measure="Bray-Curtis")

unfrac_ecopaired.uwUF.df = data.frame(as.matrix(unfrac_uwUF.dist)) %>%
  tibble::rownames_to_column(var="Sample1") %>%
  tidyr::gather(key="Sample2", value="distance", -Sample1) %>%
  mutate(Sample2 = gsub("13C.", "13C-", gsub("12C.", "12C-", gsub("H2O.", "H2O-", Sample2)))) %>%
  inner_join(unfrac_ecopaired.df, by = c("Sample1", "Sample2")) %>%
  mutate(measure="Unweighted UniFrac")

unfrac_ecopaired.wUF.df = data.frame(as.matrix(unfrac_wUF.dist)) %>%
  tibble::rownames_to_column(var="Sample1") %>%
  tidyr::gather(key="Sample2", value="distance", -Sample1) %>%
  mutate(Sample2 = gsub("13C.", "13C-", gsub("12C.", "12C-", gsub("H2O.", "H2O-", Sample2)))) %>%
  inner_join(unfrac_ecopaired.df, by = c("Sample1", "Sample2")) %>%
  mutate(measure="Weighted UniFrac")

unfrac_ecopaired.dist.df = rbind(unfrac_ecopaired.BC.df, unfrac_ecopaired.uwUF.df, unfrac_ecopaired.wUF.df) %>%
  mutate(eco_pair = factor(paste(ecosystem_1, ecosystem_2, sep="-"),
                           levels = c("agriculture-meadow", "agriculture-forest", "meadow-forest")),
         measure = factor(measure, levels = c("Bray-Curtis", "Unweighted UniFrac", "Weighted UniFrac")))

unfrac_ecopaired.dist.sum = unfrac_ecopaired.dist.df %>%
  group_by(eco_pair, day, treatment, measure) %>%
  summarize(mean_dist = mean(distance),
            sd_dist = sd(distance),
            n_pairs = n()) %>%
  as.data.frame %>%
  mutate(SE_dist = sd_dist/sqrt(n_pairs))

Now I an compare the distances between land use regimes on each day to their day 0 distance. I’ll use a two sample t-test and adjust the p-value for multiple comparisons.

# Run analysis separately for each beta-diversity metric
ecopair_ttest.df = data.frame()
eco_pair = NULL
for (metric in c("Bray-Curtis", "Unweighted UniFrac", "Weighted UniFrac")){
  for (pair in c("agriculture-meadow", "agriculture-forest", "meadow-forest")){
    subday0.df = unfrac_ecopaired.dist.df %>%
      filter(measure == metric,
             eco_pair == pair,
             day == 0)
    for (dia in c(1, 3, 6, 14, 30)){
      sub.df = unfrac_ecopaired.dist.df %>%
        filter(measure == metric,
               eco_pair == pair,
               treatment == "Carbon",
               day == dia)
      sub.ttest = t.test(x=sub.df$distance, y=subday0.df$distance)
      ecopair_ttest.df = rbind(ecopair_ttest.df, data.frame(eco_pair = pair, day = dia, treatment = "Carbon", measure = metric,
                                                            t = sub.ttest$statistic, df = sub.ttest$parameter, 
                                                            pvalue = sub.ttest$p.value, mean = sub.ttest$estimate[1], 
                                                            mean_day0 = sub.ttest$estimate[2],
                                                            lower.CL = sub.ttest$conf.int[1], upper.CL = sub.ttest$conf.int[2]))
    }
    sub.df = unfrac_ecopaired.dist.df %>%
      filter(measure == metric,
             eco_pair == pair,
             treatment == "Water",
             day == 30)
    sub.ttest = t.test(x=sub.df$distance, y=subday0.df$distance)
    ecopair_ttest.df = rbind(ecopair_ttest.df, data.frame(eco_pair = pair, day = 30, treatment = "Water", measure = metric,
                                                          t = sub.ttest$statistic, df = sub.ttest$parameter, 
                                                          pvalue = sub.ttest$p.value, mean = sub.ttest$estimate[1],
                                                          mean_day0 = sub.ttest$estimate[2],
                                                          lower.CL = sub.ttest$conf.int[1], upper.CL = sub.ttest$conf.int[2]))
  }
}
ecopair_ttest.df = ecopair_ttest.df %>%
  group_by(measure) %>%
  mutate(padj = p.adjust(pvalue, method="BH")) %>%
  ungroup %>%
  mutate(sig = ifelse(padj < 0.05, "Significant", "Non-significant"),
         eco_pair = factor(eco_pair, levels = c("agriculture-meadow", "agriculture-forest", "meadow-forest")),
         measure = factor(measure, levels = c("Bray-Curtis", "Unweighted UniFrac", "Weighted UniFrac"))) %>%
  left_join(unfrac_ecopaired.dist.sum, by = c("eco_pair", "day", "treatment", "measure"))

ecopair_ttest.df
## # A tibble: 54 x 17
##    eco_pair   day treatment measure      t    df  pvalue  mean mean_day0
##    <fct>    <dbl> <chr>     <fct>    <dbl> <dbl>   <dbl> <dbl>     <dbl>
##  1 agricul…     1 Carbon    Bray-C…  0.400  3.29 7.13e-1 0.725     0.721
##  2 agricul…     3 Carbon    Bray-C… 10.9    4.47 2.18e-4 0.833     0.721
##  3 agricul…     6 Carbon    Bray-C…  9.14   3.20 2.13e-3 0.807     0.721
##  4 agricul…    14 Carbon    Bray-C…  6.60   3.10 6.36e-3 0.783     0.721
##  5 agricul…    30 Carbon    Bray-C…  4.62   3.11 1.77e-2 0.764     0.721
##  6 agricul…    30 Water     Bray-C… -2.30   5.93 6.13e-2 0.696     0.721
##  7 agricul…     1 Carbon    Bray-C… -1.95   4.05 1.23e-1 0.680     0.696
##  8 agricul…     3 Carbon    Bray-C…  2.76  10.2  1.99e-2 0.725     0.696
##  9 agricul…     6 Carbon    Bray-C…  1.23   4.40 2.82e-1 0.706     0.696
## 10 agricul…    14 Carbon    Bray-C… -1.19   3.54 3.07e-1 0.687     0.696
## # … with 44 more rows, and 8 more variables: lower.CL <dbl>, upper.CL <dbl>,
## #   padj <dbl>, sig <chr>, mean_dist <dbl>, sd_dist <dbl>, n_pairs <int>,
## #   SE_dist <dbl>

Now I’ll plot the results.

ggplot(data=ecopair_ttest.df, aes(x=day, y=mean)) +
  #geom_hline(data=unique(select(ecopair_ttest.df, eco_pair, measure, mean_day0)),
            # aes(yintercept=mean_day0), linetype=2) +
  geom_hline(data=unfrac_ecopaired.dist.sum[unfrac_ecopaired.dist.sum$day == 0,],
             aes(yintercept=mean_dist), linetype=2) +
  geom_hline(data=unfrac_ecopaired.dist.sum[unfrac_ecopaired.dist.sum$day == 0,],
             aes(yintercept=mean_dist-SE_dist), linetype=3) +
  geom_hline(data=unfrac_ecopaired.dist.sum[unfrac_ecopaired.dist.sum$day == 0,],
             aes(yintercept=mean_dist+SE_dist), linetype=3) +
  geom_line(data=ecopair_ttest.df[ecopair_ttest.df$treatment=="Carbon",]) +
  geom_errorbar(aes(ymin=mean-SE_dist, ymax=mean+SE_dist), width=1) +
  geom_point(aes(fill=sig, shape=treatment), size=3) +
  scale_fill_manual(values = c("Significant" = "black", "Non-significant" = "white")) +
  scale_shape_manual(values = c(Water=24, Carbon = 21)) +
  labs(x="Time since substrate addition (days)", y="Community dissimilarity/distance") +
  theme_bw() +
  theme(legend.position = "none",
        legend.title=element_text(size=14),
        legend.text=element_text(size=12),
        axis.title = element_text(size=14),
        axis.text = element_text(size=12),
        strip.text = element_text(size=10)) +
  facet_grid(measure~eco_pair, scales = "free_y")

Abundance of Taxa

We see that across the board, agriculture seems to show the greatest shift in diversity after substrate addition. Now I want to see if we can visualize this change with regards to the taxonomy and abundance of the OTUs. So, here I want to make a figure showing the abundance of various taxa across each land use regime over time.

# Get OTU relative abundances
OTU_table.df = data.frame(otu_table(unfrac.rare.physeq)) %>%
  tibble::rownames_to_column(var="OTU") %>%
  tidyr::gather(key="X.Sample", value="abundance", -OTU) %>%
  mutate(X.Sample = gsub("13C.", "13C-", gsub("12C.", "12C-", gsub("H2O.", "H2O-", X.Sample)))) %>%
  group_by(X.Sample) %>%
  mutate(total_count = sum(abundance)) %>%
  ungroup %>%
  mutate(abundance = abundance/total_count*100) %>%
  filter(abundance > 0)

# Add in taxonomy information
taxonomy.df = data.frame(tax_table(unfrac.rare.physeq), stringsAsFactors = FALSE) %>%
  tibble::rownames_to_column(var="OTU")

# Group taxa that have less than 1% abundance in any sample
OTU_table.taxa.df = left_join(OTU_table.df, taxonomy.df, by = "OTU") %>%
  mutate(taxa = ifelse(Phylum == "Proteobacteria", as.character(Class), as.character(Phylum))) %>%
  group_by(taxa, X.Sample) %>%
  mutate(taxa_abd = sum(abundance)) %>%
  ungroup %>%
  group_by(taxa) %>%
  mutate(max_taxa_abd = max(taxa_abd)) %>%
  ungroup %>%
  mutate(taxa = ifelse(max_taxa_abd < 1, "Less than 1%", taxa))

# Add in the sample metadata
unfrac_metadata.df = data.frame(sample_data(unfrac.rare.physeq)) %>%
  select(X.Sample, ecosystem, day, substrate, microcosm_replicate) %>%
  left_join(data.frame(ecosystem = c("agriculture", "meadow", "forest"), 
                       landuse = factor(c("Cropland", "Old-field", "Forest"), levels=c("Cropland", "Old-field", "Forest"))),
            by = "ecosystem") %>%
  mutate(ecosystem = factor(ecosystem, levels=c("agriculture", "meadow", "forest")),
         substrate = factor(substrate, levels=c("H2O-Con", "12C-Con", "13C-Xyl", "13C-Ami", "13C-Van", "13C-Cel", "13C-Pal")),
         day_factor = factor(ifelse(substrate == "H2O-Con", "H2O-Con", day), c(0, 1, 3, 6, 14, 30, "H2O-Con")))

OTU_table.taxa.meta.df = left_join(OTU_table.taxa.df, unfrac_metadata.df, by = "X.Sample") %>%
  arrange(day_factor, substrate, ecosystem, microcosm_replicate)
OTU_table.taxa.meta.df$X.Sample = factor(OTU_table.taxa.meta.df$X.Sample, levels=unique(OTU_table.taxa.meta.df$X.Sample))

Now I’ll plot these abundances for my dissertation.

# Set colors for the taxa
source("/Users/sambarnett/Documents/Misc_code/paul_tol_colors.R")
taxa = unique(OTU_table.taxa.meta.df[OTU_table.taxa.meta.df$taxa != "Less than 1%",]$taxa)
taxa.cols = c(paultol_colors(length(taxa)), "#777777")
names(taxa.cols) = c(sort(taxa), "Less than 1%")

# Get summary abundance of each taxa
OTU_table.taxa.meta.sum = OTU_table.taxa.meta.df %>%
  group_by(X.Sample, day_factor, substrate, landuse, microcosm_replicate, taxa) %>%
  summarize(taxa_abd = sum(abundance)) %>%
  as.data.frame

# Get positions for each replicate in the plot. Separating bars by their day with separate bars per replicate
day.shift = data.frame(day_factor = c(0, 1, 3, 6, 14, 30, "H2O-Con"),
                       shift = c(0, 3, 6, 9, 12, 15, 20))
plot.positions = OTU_table.taxa.meta.sum %>%
  select(X.Sample, landuse, day_factor, substrate, microcosm_replicate) %>%
  unique %>%
  arrange(day_factor, substrate, microcosm_replicate) %>%
  group_by(landuse) %>%
  mutate(rank = row_number()) %>%
  ungroup %>%
  left_join(day.shift) %>%
  mutate(position = rank + shift) %>%
  select(X.Sample, position)

OTU_table.taxa.meta.sum = left_join(OTU_table.taxa.meta.sum, plot.positions, by="X.Sample") %>%
  mutate(taxa = factor(taxa, levels=names(taxa.cols)))

plot.xlab = OTU_table.taxa.meta.sum %>%
  select(day_factor, position, landuse) %>%
  unique %>%
  mutate(label = ifelse(day_factor == "H2O-Con", "w", as.character(day_factor))) %>%
  group_by(label, landuse) %>%
  summarize(midpoint = mean(position)) %>%
  as.data.frame

# Plot
taxa_abund.plot = ggplot(data=OTU_table.taxa.meta.sum, aes(x=position, y=taxa_abd)) +
  geom_bar(stat="identity", aes(fill=taxa, color=taxa)) +
  geom_text(data=plot.xlab, aes(x=midpoint, label=label), y=-3, size=4.5) +
  scale_y_continuous(limits=c(-5, 100), expand = c(0, 1)) +
  labs(x="Time since substrate addition (days)", y="Relative abundance (% of reads)",
       fill="Phylum/Class", color="Phylum/Class") +
  scale_fill_manual(values=taxa.cols) +
  scale_color_manual(values=taxa.cols) +
  theme_bw() +
  theme(panel.border = element_blank(),
        panel.grid.major = element_blank(), 
        panel.grid.minor = element_blank(),
        legend.position = "right",
        legend.title=element_text(size=14),
        legend.text=element_text(size=12),
        axis.title = element_text(size=14),
        axis.text.x = element_blank(),
        axis.text.y = element_text(size=12),
        axis.ticks.x = element_blank(),
        strip.text = element_text(size=10)) +
  facet_wrap(~landuse)

taxa_abund.plot

#ggsave(taxa_abund.plot, filename = "/Users/sambarnett/Documents/Dissertation/figures/figS2_14.tiff", 
#       device = "tiff", width = 6.69291, height = 4.72441, units = "in")

Now plot for publication.

# Set colors for the taxa
source("/Users/sambarnett/Documents/Misc_code/paul_tol_colors.R")
taxa = unique(OTU_table.taxa.meta.df[OTU_table.taxa.meta.df$taxa != "Less than 1%",]$taxa)
taxa.cols = c(paultol_colors(length(taxa)), "#777777")
names(taxa.cols) = c(sort(taxa), "Less than 1%")

# Get summary abundance of each taxa
OTU_table.taxa.meta.sum = OTU_table.taxa.meta.df %>%
  group_by(X.Sample, day_factor, substrate, landuse, microcosm_replicate, taxa) %>%
  summarize(taxa_abd = sum(abundance)) %>%
  as.data.frame

# Get positions for each replicate in the plot. Separating bars by their day with separate bars per replicate
day.shift = data.frame(day_factor = c(0, 1, 3, 6, 14, 30, "H2O-Con"),
                       shift = c(0, 3, 6, 9, 12, 15, 20))
plot.positions = OTU_table.taxa.meta.sum %>%
  select(X.Sample, landuse, day_factor, substrate, microcosm_replicate) %>%
  unique %>%
  arrange(day_factor, substrate, microcosm_replicate) %>%
  group_by(landuse) %>%
  mutate(rank = row_number()) %>%
  ungroup %>%
  left_join(day.shift) %>%
  mutate(position = rank + shift) %>%
  select(X.Sample, position)

OTU_table.taxa.meta.sum = left_join(OTU_table.taxa.meta.sum, plot.positions, by="X.Sample") %>%
  mutate(taxa = factor(taxa, levels=names(taxa.cols)))

plot.xlab = OTU_table.taxa.meta.sum %>%
  select(day_factor, position, landuse) %>%
  unique %>%
  mutate(label = ifelse(day_factor == "H2O-Con", "w", as.character(day_factor))) %>%
  group_by(label, landuse) %>%
  summarize(midpoint = mean(position)) %>%
  as.data.frame

# Plot
taxa_abund.plot = ggplot(data=OTU_table.taxa.meta.sum, aes(x=position, y=taxa_abd)) +
  geom_bar(stat="identity", aes(fill=taxa, color=taxa)) +
  geom_text(data=plot.xlab, aes(x=midpoint, label=label), y=-2, size=(6*5/14)) +
  scale_y_continuous(limits=c(-2.5, 100), expand = c(0, 1)) +
  labs(x="Time since substrate addition (days)", y="Relative abundance (% of reads)",
       fill="Phylum/Class", color="Phylum/Class") +
  scale_fill_manual(values=taxa.cols) +
  scale_color_manual(values=taxa.cols) +
  theme_bw() +
  theme(panel.border = element_blank(),
        panel.grid.major = element_blank(), 
        panel.grid.minor = element_blank(),
        axis.ticks.y = element_line(size=0.2),
        legend.position = "right",
        legend.title=element_text(size=7),
        legend.text=element_text(size=6),
        axis.title = element_text(size=7),
        axis.text.x = element_blank(),
        axis.text.y = element_text(size=6),
        axis.ticks.x = element_blank(),
        strip.text = element_text(size=6)) +
  facet_wrap(~landuse)
beta_ords_taxa_abund.plot = cowplot::plot_grid(beta_ords_leg.plot, taxa_abund.plot, nrow=2, rel_heights = c(0.7, 1), labels = c("", "d"), label_size = 10)
beta_ords_taxa_abund.plot

#ggsave(beta_ords_taxa_abund.plot, filename = "/Users/sambarnett/Documents/Buckley Lab/FullCyc2/manuscript/Figures/Fig2.tiff", 
#       device = "tiff", width = 7.08661, height = 7.08661, units = "in")

Lets plot the change in abundance of the top5 most abundant OTUs in each land use

library(grid)
library(gridExtra)

Top5OTUs.df = OTU_table.taxa.meta.df %>%
  filter(substrate != "H2O-Con") %>%
  group_by(OTU, day_factor, landuse, Domain, Phylum, Class, Order, Family, Genus, Species) %>%
  summarize(mean_abd = mean(abundance)) %>%
  as.data.frame %>%
  group_by(OTU, landuse, Domain, Phylum, Class, Order, Family, Genus, Species) %>%
  summarize(max_abd = max(mean_abd)) %>%
  as.data.frame %>%
  arrange(-max_abd) %>%
  group_by(landuse) %>%
  mutate(rank = row_number()) %>%
  ungroup %>%
  filter(rank <= 5) %>%
  as.data.frame %>%
  arrange(landuse, rank) %>%
  mutate(taxa = ifelse(!(is.na(Genus)) & !(grepl("uncultured", Genus)) & !(grepl("Ambiguous", Genus)), Genus,
                       ifelse(!(is.na(Family)) & !(grepl("uncultured", Family)), Family,
                              ifelse(!(is.na(Order)) & !(grepl("uncultured", Order)), Order, Class)))) %>%
  mutate(taxa_level = ifelse(!(is.na(Genus)) & !(grepl("uncultured", Genus)) & !(grepl("Ambiguous", Genus)), "Genus", 
                             ifelse(!(is.na(Family)) & !(grepl("uncultured", Family)), "Family",
                                    ifelse(!(is.na(Order)) & !(grepl("uncultured", Order)), "Order", "Class")))) %>%
  mutate(taxa = gsub("Burkholderia-Caballeronia-Paraburkholderia", "B-C-P", taxa),
         taxa = gsub("Candidatus", "Ca.", taxa)) %>%
  mutate(OTU_name = paste(rank, ")   ", OTU, "\n", taxa, " (", taxa_level, ")", sep="")) %>%
  select(OTU, landuse, OTU_name) #%>%


# Get OTU relative abundances
Top5OTUs.counts.df = data.frame(otu_table(unfrac.rare.physeq)) %>%
  tibble::rownames_to_column(var="OTU") %>%
  tidyr::gather(key="X.Sample", value="abundance", -OTU) %>%
  mutate(X.Sample = gsub("13C.", "13C-", gsub("12C.", "12C-", gsub("H2O.", "H2O-", X.Sample)))) %>%
  group_by(X.Sample) %>%
  mutate(total_count = sum(abundance)) %>%
  ungroup %>%
  mutate(abundance = abundance/total_count*100) %>%
  filter(OTU %in% Top5OTUs.df$OTU) %>%
  left_join(unfrac_metadata.df, by = "X.Sample") %>%
  inner_join(Top5OTUs.df, by = c("OTU", "landuse")) %>%
  mutate(treattype = ifelse(substrate == "H2O-Con", "Water control", "Carbon ammended")) %>%
  group_by(OTU, OTU_name, day, landuse, treattype) %>%
  summarize(mean_abd = mean(abundance),
            sd_abd = sd(abundance),
            n_sam = n()) %>%
  as.data.frame %>%
  mutate(SE_abd = sd_abd/sqrt(n_sam)) %>%
  arrange(landuse, day)

# Plot
ag.top5.plot = ggplot(data=filter(Top5OTUs.counts.df, landuse == "Cropland"), aes(x=day, y=mean_abd, shape=treattype)) +
  geom_errorbar(aes(ymin=mean_abd-SE_abd, ymax=mean_abd+SE_abd), width=0.2)+
  geom_point() +
  geom_line() +
  ggtitle("Cropland") +
  theme_bw() +
  theme(legend.position = "none",
        axis.title = element_blank(),
        axis.text = element_text(size=10),
        strip.text = element_text(size=10),
        plot.title = element_text(hjust = 0.5, size=12)) +
  facet_wrap(~ OTU_name, ncol=1, scales="free_y")

m.top5.plot = ggplot(data=filter(Top5OTUs.counts.df, landuse == "Old-field"), aes(x=day, y=mean_abd, shape=treattype)) +
  geom_errorbar(aes(ymin=mean_abd-SE_abd, ymax=mean_abd+SE_abd), width=0.2)+
  geom_point() +
  geom_line() +
  ggtitle("Old-field") +
  theme_bw() +
  theme(legend.position = "none",
        axis.title = element_blank(),
        axis.text = element_text(size=10),
        strip.text = element_text(size=10),
        plot.title = element_text(hjust = 0.5, size=12)) +
  facet_wrap(~ OTU_name, ncol=1, scales="free_y")

f.top5.plot = ggplot(data=filter(Top5OTUs.counts.df, landuse == "Forest"), aes(x=day, y=mean_abd, shape=treattype)) +
  geom_errorbar(aes(ymin=mean_abd-SE_abd, ymax=mean_abd+SE_abd), width=0.2)+
  geom_point() +
  geom_line() +
  ggtitle("Forest") +
  theme_bw() +
  theme(legend.position = "none",
        axis.title = element_blank(),
        axis.text = element_text(size=10),
        strip.text = element_text(size=10),
        plot.title = element_text(hjust = 0.5, size=12)) +
  facet_wrap(~ OTU_name, ncol=1, scales="free_y")

top5.plot = cowplot::plot_grid(ag.top5.plot, m.top5.plot, f.top5.plot, nrow=1)
top5.plot

# Add axes

y.grob <- textGrob("Relative abundance (% of reads)", 
                   gp=gpar(fontface="bold", fontsize=14), rot=90)
x.grob <- textGrob("Time since substrate addition (days)", 
                   gp=gpar(fontface="bold", fontsize=14))

top5OTU_abund.plot = arrangeGrob(top5.plot, left = y.grob, bottom = x.grob)

grid.arrange(top5OTU_abund.plot)

#ggsave(top5OTU_abund.plot, filename = "/Users/sambarnett/Documents/Dissertation/figures/figS2_15.tiff", 
#       device = "tiff", width = 6.69291, height = 6.69291, units = "in")

DNA concentration

landuse.conv = data.frame(ecosystem = c("agriculture", "meadow", "forest"),
                          habitat = c("A", "M", "F"),
                          landuse = factor(c("Cropland", "Old-field", "Forest"), levels = c("Cropland", "Old-field", "Forest")))

landuse.col = c("Cropland"="#00BA38", "Old-field"="#619CFF", "Forest"="#F8766D")

perc_water.df = read.table("/Users/sambarnett/Documents/Buckley Lab/FullCyc2/Soil_drying.txt", header = TRUE) %>%
  rename(X.Sample = sample)

perc_water.sum = perc_water.df %>%
  filter(!(is.na(Perc_water))) %>%
  group_by(habitat, day) %>%
  summarize(n_samples = n(),
            mean_Perc_water = mean(Perc_water),
            sd_Perc_water = sd(Perc_water)) %>%
  as.data.frame %>%
  mutate(SE_Perc_water = sd_Perc_water/sqrt(n_samples))

ggplot(data=perc_water.sum, aes(x=day, y=mean_Perc_water, color=habitat)) +
  geom_line() +
  geom_point() +
  geom_errorbar(aes(ymin=mean_Perc_water-SE_Perc_water, ymax=mean_Perc_water+SE_Perc_water)) +
  labs(x="Sampling day", y="Mean % water content", color="Land-use") +
  theme_bw() +
  theme(legend.position = "top",
        legend.title=element_text(size=12),
        legend.text=element_text(size=12),
        axis.title = element_text(size=12),
        axis.text = element_text(size=12))

DNA_conc.df = data.frame(sample_data(unfrac.physeq)) %>%
  mutate(X.Sample = ifelse(X.Sample == "MR.F.12C-Con.D0.R2_primer2", "MR.F.12C-Con.D0.R2",
                           ifelse(X.Sample == "MR.M.12C-Con.D0.R2_primer2", "MR.M.12C-Con.D0.R2",
                                  as.character(X.Sample)))) %>%
  mutate(DNA_conc = ifelse(DNA_conc__ng_ul != "TBD", 
                           as.numeric(as.character(DNA_conc__ng_ul)), NA)) %>%
  select(X.Sample, DNA_conc) %>%
  full_join(perc_water.df, by="X.Sample") %>%
  arrange(habitat, day, substrate, microcosm_replicate) %>%
  mutate(ext_soil_mass = 0.25*(1-Perc_water)) %>%
  mutate(DNA_pG_soil = (DNA_conc*100)/ext_soil_mass)

DNA_conc.sum = DNA_conc.df %>%
  filter(!(is.na(DNA_pG_soil))) %>%
  group_by(habitat, day) %>%
  summarize(n_samples = n(),
            mean_DNA_pG_soil = mean(DNA_pG_soil),
            sd_DNA_pG_soil = sd(DNA_pG_soil)) %>%
  as.data.frame %>%
  mutate(SE_DNA_pG_soil = sd_DNA_pG_soil/sqrt(n_samples)) %>%
  left_join(landuse.conv, by="habitat")
  

ggplot(data=DNA_conc.sum, aes(x=day, y=mean_DNA_pG_soil, color=landuse)) +
  geom_line() +
  geom_point() +
  geom_errorbar(aes(ymin=mean_DNA_pG_soil-SE_DNA_pG_soil, ymax=mean_DNA_pG_soil+SE_DNA_pG_soil)) +
  scale_color_manual(values=landuse.col) +
  labs(x="Time since SOM addition", y="Mean ng DNA per g soil", color="Land use") +
  theme_bw() +
  theme(legend.position = "right",
        legend.title=element_text(size=12),
        legend.text=element_text(size=12),
        axis.title = element_text(size=12),
        axis.text = element_text(size=12))

Session info

sessionInfo()
## R version 3.6.3 (2020-02-29)
## Platform: x86_64-apple-darwin15.6.0 (64-bit)
## Running under: macOS Catalina 10.15.7
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/3.6/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/3.6/Resources/lib/libRlapack.dylib
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## attached base packages:
## [1] grid      stats     graphics  grDevices utils     datasets  methods  
## [8] base     
## 
## other attached packages:
##  [1] gridExtra_2.3   ggplot2_3.3.3   lsmeans_2.30-0  emmeans_1.4.6  
##  [5] vegan_2.5-6     lattice_0.20-41 permute_0.9-5   ape_5.3        
##  [9] phyloseq_1.30.0 dplyr_1.0.6    
## 
## loaded via a namespace (and not attached):
##  [1] Biobase_2.46.0      tidyr_1.0.2         jsonlite_1.6.1     
##  [4] splines_3.6.3       foreach_1.5.0       assertthat_0.2.1   
##  [7] stats4_3.6.3        yaml_2.2.1          pillar_1.6.0       
## [10] glue_1.4.0          digest_0.6.25       XVector_0.26.0     
## [13] colorspace_1.4-1    sandwich_2.5-1      cowplot_1.0.0      
## [16] htmltools_0.4.0     Matrix_1.2-18       plyr_1.8.6         
## [19] pkgconfig_2.0.3     zlibbioc_1.32.0     purrr_0.3.4        
## [22] xtable_1.8-4        mvtnorm_1.1-0       scales_1.1.0       
## [25] tibble_3.0.1        mgcv_1.8-31         generics_0.1.0     
## [28] farver_2.0.3        IRanges_2.20.2      ellipsis_0.3.0     
## [31] TH.data_1.0-10      withr_2.2.0         BiocGenerics_0.32.0
## [34] cli_2.0.2           survival_3.1-12     magrittr_1.5       
## [37] crayon_1.3.4        estimability_1.3    evaluate_0.14      
## [40] fansi_0.4.1         nlme_3.1-147        MASS_7.3-51.6      
## [43] tools_3.6.3         data.table_1.12.8   lifecycle_1.0.0    
## [46] multcomp_1.4-13     stringr_1.4.0       Rhdf5lib_1.8.0     
## [49] S4Vectors_0.24.4    munsell_0.5.0       cluster_2.1.0      
## [52] Biostrings_2.54.0   ade4_1.7-15         compiler_3.6.3     
## [55] rlang_0.4.11        rhdf5_2.30.1        iterators_1.0.12   
## [58] biomformat_1.14.0   rstudioapi_0.11     igraph_1.2.5       
## [61] labeling_0.3        rmarkdown_2.1       gtable_0.3.0       
## [64] codetools_0.2-16    multtest_2.42.0     reshape2_1.4.4     
## [67] R6_2.4.1            zoo_1.8-8           knitr_1.28         
## [70] utf8_1.1.4          stringi_1.4.6       parallel_3.6.3     
## [73] Rcpp_1.0.4.6        vctrs_0.3.8         tidyselect_1.1.1   
## [76] xfun_0.13           coda_0.19-3